summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--decoder/apply_models.cc53
-rw-r--r--decoder/cdec_ff.cc2
-rw-r--r--decoder/ff.h19
-rw-r--r--decoder/ff_const_reorder.cc54
-rw-r--r--decoder/ff_const_reorder.h14
-rw-r--r--decoder/ffset.cc58
-rw-r--r--decoder/ffset.h17
-rw-r--r--utils/Makefile.am4
-rw-r--r--utils/alignment.h18
-rw-r--r--utils/argument_reorder_model.cc46
-rw-r--r--utils/argument_reorder_model.h73
-rw-r--r--utils/constituent_reorder_model.cc239
-rw-r--r--utils/lbfgs.h3
-rw-r--r--utils/maxent.cpp2
-rw-r--r--utils/srl_sentence.h31
-rw-r--r--utils/synutils.h18
-rw-r--r--utils/tree.h55
-rw-r--r--utils/tsuruoka_maxent.h49
19 files changed, 273 insertions, 483 deletions
diff --git a/.gitignore b/.gitignore
index 4efcdd8d..c75328dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -181,6 +181,7 @@ training/mr_reduce_to_weights
training/optimize_test
training/plftools
training/test_ngram
+utils/argument_reorder_model_trainer
utils/atools
utils/bin/
utils/const_reorder_model_trainer
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 3f3f6a79..18c83fd4 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -233,24 +233,19 @@ public:
void IncorporateIntoPlusLMForest(size_t head_node_hash, Candidate* item, State2Node* s2n, CandidateList* freelist) {
Hypergraph::Edge* new_edge = out.AddEdge(item->out_edge_);
new_edge->edge_prob_ = item->out_edge_.edge_prob_;
- //start: new code by lijunhui
- FFState real_state;
- FFState* real_state_ref;
- if (models.HaveEraseState()) {
- models.GetRealFFState(item->state_, real_state);
- real_state_ref = &real_state;
+
+ Candidate** o_item_ptr = nullptr;
+ if (item->state_.size() && models.NeedsStateErasure()) {
+ // When erasure of certain state bytes is needed, we must make a copy of
+ // the state instead of doing the erasure in-place because future
+ // candidates may require the information in the bytes to be erased.
+ FFState state(item->state_);
+ models.EraseIgnoredBytes(&state);
+ o_item_ptr = &(*s2n)[state];
+ } else {
+ o_item_ptr = &(*s2n)[item->state_];
}
- else
- real_state_ref = &(item->state_);
- Candidate*& o_item = (*s2n)[(*real_state_ref)];
- /*FFState real_state;
- models.GetRealFFState(item->state_, real_state);
- Candidate*& o_item = (*s2n)[real_state];*/
- //end: new code by lijunhui
-
- //start: original code
- //Candidate*& o_item = (*s2n)[item->state_];
- //end: original code
+ Candidate*& o_item = *o_item_ptr;
if (!o_item) o_item = item;
@@ -272,18 +267,17 @@ public:
// score is the same for all items with a common residual DP
// state
if (item->vit_prob_ > o_item->vit_prob_) {
- //start: new code by lijunhui
- if (models.HaveEraseState()) {
- assert(models.GetRealFFState(o_item->state_) == models.GetRealFFState(item->state_)); // sanity check!
+ if (item->state_.size() && models.NeedsStateErasure()) {
+ // node_states_ should still point to the unerased state.
node_states_[o_item->node_index_] = item->state_;
+ // sanity check!
+ FFState item_state(item->state_), o_item_state(o_item->state_);
+ models.EraseIgnoredBytes(&item_state);
+ models.EraseIgnoredBytes(&o_item_state);
+ assert(item_state == o_item_state);
} else {
- assert(o_item->state_ == item->state_); // sanity check!
+ assert(o_item->state_ == item->state_); // sanity check!
}
- //end: new code by lijunhui
-
- //start: original code
- //assert(o_item->state_ == item->state_); // sanity check!
- //end: original code
o_item->est_prob_ = item->est_prob_;
o_item->vit_prob_ = item->vit_prob_;
@@ -629,9 +623,10 @@ void ApplyModelSet(const Hypergraph& in,
if (models.stateless() || config.algorithm == IntersectionConfiguration::FULL) {
NoPruningRescorer ma(models, smeta, in, out); // avoid overhead of best-first when no state
ma.Apply();
- } else if (config.algorithm == IntersectionConfiguration::CUBE
- || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING
- || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING_2) {
+ } else if (config.algorithm == IntersectionConfiguration::CUBE ||
+ config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING ||
+ config.algorithm ==
+ IntersectionConfiguration::FAST_CUBE_PRUNING_2) {
int pl = config.pop_limit;
const int max_pl_for_large=50;
if (pl > max_pl_for_large && in.nodes_.size() > 80000) {
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index e7a8bbc1..6f7227aa 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -78,6 +78,6 @@ void register_feature_functions() {
ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
ff_registry.Register("SourcePathFeatures", new FFFactory<SourcePathFeatures>);
ff_registry.Register("WordSet", new FFFactory<WordSet>);
- ff_registry.Register("ConstReorderFeature", new ConstReorderFeatureFactory());
+ ff_registry.Register("ConstReorderFeature", new FFFactory<ConstReorderFeature>);
ff_registry.Register("External", new FFFactory<ExternalFeature>);
}
diff --git a/decoder/ff.h b/decoder/ff.h
index 3280592e..afa3dbca 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -17,11 +17,17 @@ class FeatureFunction {
friend class ExternalFeature;
public:
std::string name_; // set by FF factory using usage()
- FeatureFunction() : state_size_() {}
- explicit FeatureFunction(int state_size) : state_size_(state_size) {}
+ FeatureFunction() : state_size_(), ignored_state_size_() {}
+ explicit FeatureFunction(int state_size, int ignored_state_size = 0)
+ : state_size_(state_size), ignored_state_size_(ignored_state_size) {}
virtual ~FeatureFunction();
bool IsStateful() const { return state_size_ > 0; }
int StateSize() const { return state_size_; }
+ // Returns the number of bytes in the state that should be ignored during
+ // search. When non-zero, the last N bytes in the state should be ignored when
+ // splitting a hypernode by the state. This allows the feature function to
+ // store some side data and later retrieve it via the state bytes.
+ int IgnoredStateSize() const { return ignored_state_size_; }
// override this. not virtual because we want to expose this to factory template for help before creating a FF
static std::string usage(bool show_params,bool show_details) {
@@ -71,12 +77,17 @@ class FeatureFunction {
SparseVector<double>* estimated_features,
void* context) const;
- // !!! ONLY call this from subclass *CONSTRUCTORS* !!!
+ // !!! ONLY call these from subclass *CONSTRUCTORS* !!!
void SetStateSize(size_t state_size) {
state_size_ = state_size;
}
+
+ void SetIgnoredStateSize(size_t ignored_state_size) {
+ ignored_state_size_ = ignored_state_size;
+ }
+
private:
- int state_size_;
+ int state_size_, ignored_state_size_;
};
#endif
diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc
index 8bd3f4e2..c0269125 100644
--- a/decoder/ff_const_reorder.cc
+++ b/decoder/ff_const_reorder.cc
@@ -9,6 +9,7 @@
#include "hash.h"
#include "argument_reorder_model.h"
+#include <sstream>
#include <string>
#include <vector>
#include <stdio.h>
@@ -187,14 +188,6 @@ struct TargetTranslation {
vec_f_align_bit_array_(end_pos - begin_pos + 1, NULL),
vec_e_align_bit_array_(e_num_word, NULL) {
int len = end_pos - begin_pos + 1;
-
- /*vec_f_align_bit_array_.reserve(len);
- for (int i = 0; i < len; i++)
- vec_f_align_bit_array_.push_back(NULL);
-
- vec_e_align_bit_array_.reserve(e_num_word);
- for (int i = 0; i < e_num_word; i++)
- vec_e_align_bit_array_.push_back(NULL);*/
align_.reserve(1.5 * len);
}
~TargetTranslation() {
@@ -304,17 +297,16 @@ struct TargetTranslation {
if (target_end == -1) target_begin = -1;
}
- const uint16_t begin_pos_, end_pos_; // the position in parse
+ const uint16_t begin_pos_, end_pos_; // the position in parse
const uint16_t input_begin_pos_, input_end_pos_; // the position in input
const uint16_t e_num_words_;
vector<AlignmentPoint*> align_;
private:
- vector<SBitArray*> vec_f_align_bit_array_;
- vector<SBitArray*> vec_e_align_bit_array_;
-
vector<short> vec_left_most_;
vector<short> vec_right_most_;
+ vector<SBitArray*> vec_f_align_bit_array_;
+ vector<SBitArray*> vec_e_align_bit_array_;
};
struct FocusedConstituent {
@@ -710,7 +702,7 @@ struct ConstReorderFeatureImpl {
SparseVector<double>* features,
const TargetTranslation* target_translation,
const vector<const TargetTranslation*>& vec_node,
- std::vector<int>& findex) {
+ std::vector<int>& /*findex*/) {
if (b_srl_block_feature_ || b_srl_order_feature_) {
double logprob_srl_reorder_left = 0.0, logprob_srl_reorder_right = 0.0;
for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) {
@@ -1327,6 +1319,7 @@ struct ConstReorderFeatureImpl {
ConstReorderFeature::ConstReorderFeature(const std::string& param) {
pimpl_ = new ConstReorderFeatureImpl(param);
SetStateSize(ConstReorderFeatureImpl::ReserveStateSize());
+ SetIgnoredStateSize(ConstReorderFeatureImpl::ReserveStateSize());
name_ = "ConstReorderFeature";
}
@@ -1346,12 +1339,32 @@ void ConstReorderFeature::PrepareForInput(const SentenceMetadata& smeta) {
void ConstReorderFeature::TraversalFeaturesImpl(
const SentenceMetadata& /* smeta */, const Hypergraph::Edge& edge,
const vector<const void*>& ant_states, SparseVector<double>* features,
- SparseVector<double>* estimated_features, void* state) const {
+ SparseVector<double>* /*estimated_features*/, void* state) const {
pimpl_->SetConstReorderFeature(edge, features, ant_states, state);
}
-string ConstReorderFeature::usage(bool /*param*/, bool /*verbose*/) {
- return "ConstReorderFeature";
+string ConstReorderFeature::usage(bool show_params, bool show_details) {
+ ostringstream out;
+ out << "ConstReorderFeature";
+ if (show_params) {
+ out << " model_file_prefix [const_block=1 const_order=1] [srl_block=0 "
+ "srl_order=0]"
+ << "\nParameters:\n"
+ << " const_{block,order}: enable/disable constituency constraints.\n"
+ << " src_{block,order}: enable/disable semantic role labeling "
+ "constraints.\n";
+ }
+ if (show_details) {
+ out << "\n"
+ << "Soft reordering constraint features from "
+ "http://www.aclweb.org/anthology/P14-1106. To train the classifers, "
+ "use utils/const_reorder_model_trainer for constituency reordering "
+ "constraints and utils/argument_reorder_model_trainer for semantic "
+ "role labeling reordering constraints.\n"
+ << "Input segments should provide path to parse tree (resp. SRL parse) "
+ "as \"parse\" (resp. \"srl\") properties.\n";
+ }
+ return out.str();
}
boost::shared_ptr<FeatureFunction> CreateConstReorderModel(
@@ -1359,12 +1372,3 @@ boost::shared_ptr<FeatureFunction> CreateConstReorderModel(
ConstReorderFeature* ret = new ConstReorderFeature(param);
return boost::shared_ptr<FeatureFunction>(ret);
}
-
-boost::shared_ptr<FeatureFunction> ConstReorderFeatureFactory::Create(
- std::string param) const {
- return CreateConstReorderModel(param);
-}
-
-std::string ConstReorderFeatureFactory::usage(bool params, bool verbose) const {
- return ConstReorderFeature::usage(params, verbose);
-}
diff --git a/decoder/ff_const_reorder.h b/decoder/ff_const_reorder.h
index 6ed35768..a5be02d0 100644
--- a/decoder/ff_const_reorder.h
+++ b/decoder/ff_const_reorder.h
@@ -13,9 +13,16 @@
struct ConstReorderFeatureImpl;
+// Soft reordering constraint features from
+// http://www.aclweb.org/anthology/P14-1106. To train the classifers,
+// use utils/const_reorder_model_trainer for constituency reordering
+// constraints and utils/argument_reorder_model_trainer for SRL
+// reordering constraints.
+//
+// Input segments should provide path to parse tree (resp. SRL parse)
+// as "parse" (resp. "srl") properties.
class ConstReorderFeature : public FeatureFunction {
public:
- // param = "filename n"
ConstReorderFeature(const std::string& param);
~ConstReorderFeature();
static std::string usage(bool param, bool verbose);
@@ -33,9 +40,4 @@ class ConstReorderFeature : public FeatureFunction {
ConstReorderFeatureImpl* pimpl_;
};
-struct ConstReorderFeatureFactory : public FactoryBase<FeatureFunction> {
- FP Create(std::string param) const;
- std::string usage(bool params, bool verbose) const;
-};
-
#endif /* FF_CONST_REORDER_H_ */
diff --git a/decoder/ffset.cc b/decoder/ffset.cc
index 488346bc..8ba70389 100644
--- a/decoder/ffset.cc
+++ b/decoder/ffset.cc
@@ -14,25 +14,10 @@ ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>
for (int i = 0; i < models_.size(); ++i) {
model_state_pos_[i] = state_size_;
state_size_ += models_[i]->StateSize();
- }
-
- //added by lijunhui
- //erase the states for SRLReorderFeature and DepPrnFeatures
- for (int i = 0; i < models_.size(); i++) {
- if (models_[i]->name_ == string("SRLReorderFeature")
- || models_[i]->name_ == string("DepPrnFeatures")
- || models_[i]->name_ == string("SyntacticContextFeature")
- || models_[i]->name_ == string("ArgtReorderFeature")
- || models_[i]->name_ == string("ConstReorderSparseFeature")
- || models_[i]->name_ == string("ConstReorderFeature")) {
- int start_pos = model_state_pos_[i];
- int end_pos;
- if (i == models_.size() - 1)
- end_pos = state_size_;
- else
- end_pos = model_state_pos_[i + 1];
- erase_state_start_pos_.push_back(start_pos);
- erase_state_end_pos_.push_back(end_pos);
+ int num_ignored_bytes = models_[i]->IgnoredStateSize();
+ if (num_ignored_bytes > 0) {
+ ranges_to_erase_.push_back(
+ {state_size_ - num_ignored_bytes, state_size_});
}
}
}
@@ -90,32 +75,13 @@ void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMet
edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
}
-bool ModelSet::HaveEraseState() const {
- if (erase_state_start_pos_.size() == 0) return false;
- return true;
-}
-
-void ModelSet::GetRealFFState(const FFState& state, FFState& real_state) const {
- real_state.resize(state.size());
- for (int i = 0; i < state.size(); i++) {
- real_state[i] = state[i];
- }
-
- if (state.size() == 0)
- return;
- assert(state.size() == state_size_);
+bool ModelSet::NeedsStateErasure() const { return !ranges_to_erase_.empty(); }
- //erase the states for SRLReorderFeature and DepPrnFeatures and SyntacticContextFeature
- for (int i = 0; i < erase_state_start_pos_.size(); i++){
- int start_pos = erase_state_start_pos_[i];
- int end_pos = erase_state_end_pos_[i];
- for (int j = start_pos; j < end_pos; j++)
- real_state[j] = 0;
- }
-}
-
-FFState ModelSet::GetRealFFState(const FFState& state) const {
- FFState real_state;
- GetRealFFState(state, real_state);
- return real_state;
+void ModelSet::EraseIgnoredBytes(FFState* state) const {
+ // TODO: can we memset?
+ for (const auto& range : ranges_to_erase_) {
+ for (int i = range.first; i < range.second; ++i) {
+ (*state)[i] = 0;
+ }
+ }
}
diff --git a/decoder/ffset.h b/decoder/ffset.h
index 0467b01d..a69a75fa 100644
--- a/decoder/ffset.h
+++ b/decoder/ffset.h
@@ -1,6 +1,7 @@
#ifndef _FFSET_H_
#define _FFSET_H_
+#include <utility>
#include <vector>
#include "value_array.h"
#include "prob.h"
@@ -47,22 +48,18 @@ class ModelSet {
bool stateless() const { return !state_size_; }
- //added by ljh
- //some states of features are not contextual, but used for storing some useful information for calculate feature val
- //it needs to erase these states
- //this function is only called by IncorporateIntoPlusLMForest(...) in apply_models.cc
- void GetRealFFState(const FFState& state, FFState& real_state) const;
- FFState GetRealFFState(const FFState& state) const;
- bool HaveEraseState() const;
- private:
- std::vector<int> erase_state_start_pos_;
- std::vector<int> erase_state_end_pos_;
+ // Part of a feature state may be used for storing some side data for
+ // calculating feature values but not necessary for splitting hypernodes. Such
+ // bytes needs to be erased for hypernode splitting.
+ bool NeedsStateErasure() const;
+ void EraseIgnoredBytes(FFState* state) const;
private:
std::vector<const FeatureFunction*> models_;
const std::vector<double>& weights_;
int state_size_;
std::vector<int> model_state_pos_;
+ std::vector<std::pair<int, int> > ranges_to_erase_;
};
#endif
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 0bd21b2b..53967561 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -1,4 +1,4 @@
-bin_PROGRAMS = reconstruct_weights atools const_reorder_model_trainer
+bin_PROGRAMS = reconstruct_weights atools const_reorder_model_trainer argument_reorder_model_trainer
noinst_PROGRAMS = \
ts \
@@ -108,6 +108,8 @@ atools_LDADD = libutils.a
atools_LDFLAGS = $(STATIC_FLAGS)
const_reorder_model_trainer_SOURCES = constituent_reorder_model.cc
const_reorder_model_trainer_LDADD = libutils.a
+argument_reorder_model_trainer_SOURCES = argument_reorder_model.cc
+argument_reorder_model_trainer_LDADD = libutils.a
phmt_SOURCES = phmt.cc
phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
diff --git a/utils/alignment.h b/utils/alignment.h
index c0648aab..456577ca 100644
--- a/utils/alignment.h
+++ b/utils/alignment.h
@@ -15,8 +15,6 @@
#include "stringlib.h"
-using namespace std;
-
/*
* Note:
* m_vec_s_align.size() may not be equal to the length of source side
@@ -25,12 +23,12 @@ using namespace std;
*
*/
struct SAlignment {
- typedef vector<int> SingleAlign;
+ typedef std::vector<int> SingleAlign;
SAlignment(const char* pszAlign) { fnInitializeAlignment(pszAlign); }
~SAlignment() {}
bool fnIsAligned(int i, bool s) const {
- const vector<SingleAlign>* palign;
+ const std::vector<SingleAlign>* palign;
if (s == true)
palign = &m_vec_s_align;
else
@@ -70,7 +68,7 @@ struct SAlignment {
}
bool fnIsAlignedTightPhrase(int b, int e, bool s, int* pob, int* poe) const {
- const vector<SingleAlign>* palign;
+ const std::vector<SingleAlign>* palign;
if (s == true)
palign = &m_vec_s_align;
else
@@ -97,7 +95,7 @@ struct SAlignment {
* aligned to any word outside source[b, e]
* 3) return "Discon't": otherwise;
*/
- string fnIsContinuous(int b, int e) const {
+ std::string fnIsContinuous(int b, int e) const {
int ob, oe;
fnGetLeftRightMost(b, e, true, ob, oe);
if (ob == -1) return "Unaligned";
@@ -124,7 +122,7 @@ struct SAlignment {
}
private:
- void fnGetLeftRightMost(int b, int e, const vector<SingleAlign>& align,
+ void fnGetLeftRightMost(int b, int e, const std::vector<SingleAlign>& align,
int& ob, int& oe) const {
ob = oe = -1;
for (int i = b; i <= e && i < align.size(); i++) {
@@ -139,7 +137,7 @@ struct SAlignment {
m_vec_s_align.clear();
m_vec_t_align.clear();
- vector<string> terms = SplitOnWhitespace(string(pszAlign));
+ std::vector<std::string> terms = SplitOnWhitespace(std::string(pszAlign));
int si, ti;
for (size_t i = 0; i < terms.size(); i++) {
sscanf(terms[i].c_str(), "%d-%d", &si, &ti);
@@ -167,8 +165,8 @@ struct SAlignment {
}
private:
- vector<SingleAlign> m_vec_s_align; // source side words' alignment
- vector<SingleAlign> m_vec_t_align; // target side words' alignment
+ std::vector<SingleAlign> m_vec_s_align; // source side words' alignment
+ std::vector<SingleAlign> m_vec_t_align; // target side words' alignment
};
struct SAlignmentReader {
diff --git a/utils/argument_reorder_model.cc b/utils/argument_reorder_model.cc
index 58886251..5caf318f 100644
--- a/utils/argument_reorder_model.cc
+++ b/utils/argument_reorder_model.cc
@@ -6,12 +6,18 @@
*/
#include <boost/program_options.hpp>
+#include <iostream>
#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
#include "argument_reorder_model.h"
#include "synutils.h"
#include "tsuruoka_maxent.h"
+using namespace std;
+
inline void fnPreparingTrainingdata(const char* pszFName, int iCutoff,
const char* pszNewFName) {
SFReader* pFReader = new STxtFileReader(pszFName);
@@ -64,13 +70,13 @@ inline void fnPreparingTrainingdata(const char* pszFName, int iCutoff,
struct SArgumentReorderTrainer {
SArgumentReorderTrainer(
- const char* pszSRLFname, // source-side srl tree file name
- const char* pszAlignFname, // alignment filename
- const char* pszSourceFname, // source file name
- const char* pszTargetFname, // target file name
+ const char* pszSRLFname, // source-side srl tree file name
+ const char* pszAlignFname, // alignment filename
+ const char* pszSourceFname, // source file name
+ const char* pszTargetFname, // target file name
const char* pszTopPredicateFname, // target file name
- const char* pszInstanceFname, // training instance file name
- const char* pszModelFname, // classifier model file name
+ const char* pszInstanceFname, // training instance file name
+ const char* pszModelFname, // classifier model file name
int iCutoff) {
fnGenerateInstanceFiles(pszSRLFname, pszAlignFname, pszSourceFname,
pszTargetFname, pszTopPredicateFname,
@@ -110,14 +116,14 @@ struct SArgumentReorderTrainer {
}
void fnGenerateInstanceFiles(
- const char* pszSRLFname, // source-side flattened parse tree file name
- const char* pszAlignFname, // alignment filename
+ const char* pszSRLFname, // source-side flattened parse tree file name
+ const char* pszAlignFname, // alignment filename
const char* pszSourceFname, // source file name
const char* pszTargetFname, // target file name
const char* pszTopPredicateFname, // top predicate file name (we only
// consider predicates with 100+
// occurrences
- const char* pszInstanceFname // training instance file name
+ const char* pszInstanceFname // training instance file name
) {
SAlignmentReader* pAlignReader = new SAlignmentReader(pszAlignFname);
SSrlSentenceReader* pSRLReader = new SSrlSentenceReader(pszSRLFname);
@@ -257,24 +263,24 @@ inline void print_options(std::ostream& out,
if (i) out << ' ';
out << "--" << ds[i]->long_name();
}
- out << '"\n';
+ out << '\n';
}
inline string str(char const* name, po::variables_map const& conf) {
return conf[name].as<string>();
}
//--srl_file /scratch0/mt_exp/gale-align/gale-align.nw.srl.cn --align_file
-///scratch0/mt_exp/gale-align/gale-align.nw.al --source_file
-///scratch0/mt_exp/gale-align/gale-align.nw.cn --target_file
-///scratch0/mt_exp/gale-align/gale-align.nw.en --instance_file
-///scratch0/mt_exp/gale-align/gale-align.nw.argreorder.instance --model_prefix
-///scratch0/mt_exp/gale-align/gale-align.nw.argreorder.model --feature_cutoff 2
+/// scratch0/mt_exp/gale-align/gale-align.nw.al --source_file
+/// scratch0/mt_exp/gale-align/gale-align.nw.cn --target_file
+/// scratch0/mt_exp/gale-align/gale-align.nw.en --instance_file
+/// scratch0/mt_exp/gale-align/gale-align.nw.argreorder.instance --model_prefix
+/// scratch0/mt_exp/gale-align/gale-align.nw.argreorder.model --feature_cutoff 2
//--srl_file /scratch0/mt_exp/gale-ctb/gale-ctb.srl.cn --align_file
-///scratch0/mt_exp/gale-ctb/gale-ctb.align --source_file
-///scratch0/mt_exp/gale-ctb/gale-ctb.cn --target_file
-///scratch0/mt_exp/gale-ctb/gale-ctb.en0 --instance_file
-///scratch0/mt_exp/gale-ctb/gale-ctb.argreorder.instance --model_prefix
-///scratch0/mt_exp/gale-ctb/gale-ctb.argreorder.model --feature_cutoff 2
+/// scratch0/mt_exp/gale-ctb/gale-ctb.align --source_file
+/// scratch0/mt_exp/gale-ctb/gale-ctb.cn --target_file
+/// scratch0/mt_exp/gale-ctb/gale-ctb.en0 --instance_file
+/// scratch0/mt_exp/gale-ctb/gale-ctb.argreorder.instance --model_prefix
+/// scratch0/mt_exp/gale-ctb/gale-ctb.argreorder.model --feature_cutoff 2
int main(int argc, char** argv) {
po::options_description opts("Configuration options");
diff --git a/utils/argument_reorder_model.h b/utils/argument_reorder_model.h
index 062b8841..077fa5ba 100644
--- a/utils/argument_reorder_model.h
+++ b/utils/argument_reorder_model.h
@@ -8,17 +8,20 @@
#ifndef ARGUMENT_REORDER_MODEL_H_
#define ARGUMENT_REORDER_MODEL_H_
+#include <string>
+#include <vector>
+
#include "alignment.h"
#include "tree.h"
#include "srl_sentence.h"
// an argument item or a predicate item (the verb itself)
struct SSRLItem {
- SSRLItem(const STreeItem *tree_item, string role)
+ SSRLItem(const STreeItem *tree_item, std::string role)
: tree_item_(tree_item), role_(role) {}
~SSRLItem() {}
const STreeItem *tree_item_;
- const string role_;
+ const std::string role_;
};
struct SPredicateItem {
@@ -26,11 +29,13 @@ struct SPredicateItem {
: pred_(pred) {
vec_items_.reserve(pred->m_vecArgt.size() + 1);
for (int i = 0; i < pred->m_vecArgt.size(); i++) {
- vec_items_.push_back(new SSRLItem(pred->m_vecArgt[i]->m_pTreeItem,
- string(pred->m_vecArgt[i]->m_pszRole)));
+ vec_items_.push_back(
+ new SSRLItem(pred->m_vecArgt[i]->m_pTreeItem,
+ std::string(pred->m_vecArgt[i]->m_pszRole)));
}
- vec_items_.push_back(new SSRLItem(
- tree->m_vecTerminals[pred->m_iPosition]->m_ptParent, string("Pred")));
+ vec_items_.push_back(
+ new SSRLItem(tree->m_vecTerminals[pred->m_iPosition]->m_ptParent,
+ std::string("Pred")));
sort(vec_items_.begin(), vec_items_.end(), SortFunction);
begin_ = vec_items_[0]->tree_item_->m_iBegin;
@@ -43,7 +48,7 @@ struct SPredicateItem {
return (i->tree_item_->m_iBegin < j->tree_item_->m_iBegin);
}
- vector<SSRLItem *> vec_items_;
+ std::vector<SSRLItem *> vec_items_;
int begin_;
int end_;
const SPredicate *pred_;
@@ -51,13 +56,14 @@ struct SPredicateItem {
struct SArgumentReorderModel {
public:
- static string fnGetBlockOutcome(int iBegin, int iEnd, SAlignment *pAlign) {
+ static std::string fnGetBlockOutcome(int iBegin, int iEnd,
+ SAlignment *pAlign) {
return pAlign->fnIsContinuous(iBegin, iEnd);
}
static void fnGetReorderType(SPredicateItem *pPredItem, SAlignment *pAlign,
- vector<string> &vecStrLeftReorder,
- vector<string> &vecStrRightReorder) {
- vector<int> vecLeft, vecRight;
+ std::vector<std::string> &vecStrLeftReorder,
+ std::vector<std::string> &vecStrRightReorder) {
+ std::vector<int> vecLeft, vecRight;
for (int i = 0; i < pPredItem->vec_items_.size(); i++) {
const STreeItem *pCon1 = pPredItem->vec_items_[i]->tree_item_;
int iLeft1, iRight1;
@@ -66,15 +72,15 @@ struct SArgumentReorderModel {
vecLeft.push_back(iLeft1);
vecRight.push_back(iRight1);
}
- vector<int> vecLeftPosition;
+ std::vector<int> vecLeftPosition;
fnGetRelativePosition(vecLeft, vecLeftPosition);
- vector<int> vecRightPosition;
+ std::vector<int> vecRightPosition;
fnGetRelativePosition(vecRight, vecRightPosition);
vecStrLeftReorder.clear();
vecStrRightReorder.clear();
for (int i = 1; i < vecLeftPosition.size(); i++) {
- string strOutcome;
+ std::string strOutcome;
fnGetOutcome(vecLeftPosition[i - 1], vecLeftPosition[i], strOutcome);
vecStrLeftReorder.push_back(strOutcome);
fnGetOutcome(vecRightPosition[i - 1], vecRightPosition[i], strOutcome);
@@ -115,32 +121,33 @@ struct SArgumentReorderModel {
static void fnGenerateFeature(const SParsedTree *pTree,
const SPredicate *pPred,
const SPredicateItem *pPredItem, int iPos,
- const string &strBlock1,
- const string &strBlock2, ostringstream &ostr) {
+ const std::string &strBlock1,
+ const std::string &strBlock2,
+ std::ostringstream &ostr) {
SSRLItem *pSRLItem1 = pPredItem->vec_items_[iPos - 1];
SSRLItem *pSRLItem2 = pPredItem->vec_items_[iPos];
const STreeItem *pCon1 = pSRLItem1->tree_item_;
const STreeItem *pCon2 = pSRLItem2->tree_item_;
- string left_role = pSRLItem1->role_;
- string right_role = pSRLItem2->role_;
+ std::string left_role = pSRLItem1->role_;
+ std::string right_role = pSRLItem2->role_;
- string predicate_term =
+ std::string predicate_term =
pTree->m_vecTerminals[pPred->m_iPosition]->m_pszTerm;
- vector<string> vec_other_right_sibling;
+ std::vector<std::string> vec_other_right_sibling;
for (int i = iPos + 1; i < pPredItem->vec_items_.size(); i++)
vec_other_right_sibling.push_back(
- string(pPredItem->vec_items_[i]->role_));
+ std::string(pPredItem->vec_items_[i]->role_));
if (vec_other_right_sibling.size() == 0)
- vec_other_right_sibling.push_back(string("NULL"));
+ vec_other_right_sibling.push_back(std::string("NULL"));
- vector<string> vec_other_left_sibling;
+ std::vector<std::string> vec_other_left_sibling;
for (int i = 0; i < iPos - 1; i++)
vec_other_right_sibling.push_back(
- string(pPredItem->vec_items_[i]->role_));
+ std::string(pPredItem->vec_items_[i]->role_));
if (vec_other_left_sibling.size() == 0)
- vec_other_left_sibling.push_back(string("NULL"));
+ vec_other_left_sibling.push_back(std::string("NULL"));
// generate features
// f1
@@ -190,26 +197,26 @@ struct SArgumentReorderModel {
}
private:
- static void fnGetOutcome(int i1, int i2, string &strOutcome) {
+ static void fnGetOutcome(int i1, int i2, std::string &strOutcome) {
assert(i1 != i2);
if (i1 < i2) {
if (i2 > i1 + 1)
- strOutcome = string("DM");
+ strOutcome = std::string("DM");
else
- strOutcome = string("M");
+ strOutcome = std::string("M");
} else {
if (i1 > i2 + 1)
- strOutcome = string("DS");
+ strOutcome = std::string("DS");
else
- strOutcome = string("S");
+ strOutcome = std::string("S");
}
}
- static void fnGetRelativePosition(const vector<int> &vecLeft,
- vector<int> &vecPosition) {
+ static void fnGetRelativePosition(const std::vector<int> &vecLeft,
+ std::vector<int> &vecPosition) {
vecPosition.clear();
- vector<float> vec;
+ std::vector<float> vec;
for (int i = 0; i < vecLeft.size(); i++) {
if (vecLeft[i] == -1) {
if (i == 0)
diff --git a/utils/constituent_reorder_model.cc b/utils/constituent_reorder_model.cc
index 042c751b..df75a1a0 100644
--- a/utils/constituent_reorder_model.cc
+++ b/utils/constituent_reorder_model.cc
@@ -73,15 +73,14 @@ inline void fnPreparingTrainingdata(const char* pszFName, int iCutoff,
struct SConstReorderTrainer {
SConstReorderTrainer(
- const char* pszSynFname, // source-side flattened parse tree file name
- const char* pszAlignFname, // alignment filename
+ const char* pszSynFname, // source-side flattened parse tree file name
+ const char* pszAlignFname, // alignment filename
const char* pszSourceFname, // source file name
const char* pszTargetFname, // target file name
const char* pszInstanceFname, // training instance file name
- const char* pszModelPrefix, // classifier model file name prefix
- int iClassifierType, // classifier type
- int iCutoff, // feature count threshold
- const char* pszOption // other classifier parameters (for svmlight)
+ const char* pszModelPrefix, // classifier model file name prefix
+ int iCutoff, // feature count threshold
+ const char* /*pszOption*/ // other classifier parameters (for svmlight)
) {
fnGenerateInstanceFile(pszSynFname, pszAlignFname, pszSourceFname,
pszTargetFname, pszInstanceFname);
@@ -135,14 +134,14 @@ delete pZhangleMaxent;*/
}
inline void fnGetOutcome(int iL1, int iR1, int iL2, int iR2,
- const SAlignment* pAlign, string& strOutcome) {
+ const SAlignment* /*pAlign*/, string& strOutcome) {
if (iL1 == -1 && iL2 == -1)
strOutcome = "BU"; // 1. both are untranslated
else if (iL1 == -1)
strOutcome = "1U"; // 2. XP1 is untranslated
else if (iL2 == -1)
strOutcome = "2U"; // 3. XP2 is untranslated
- else if (iL1 == iL2 && iR2 == iR2)
+ else if (iL1 == iL2 && iR1 == iR2)
strOutcome = "SS"; // 4. Have same scope
else if (iL1 <= iL2 && iR1 >= iR2)
strOutcome = "1C2"; // 5. XP1's translation covers XP2's
@@ -241,7 +240,7 @@ delete pZhangleMaxent;*/
int iPos, const vector<string>& vecChunkStatus,
const vector<int>& vecPosition,
const vector<string>& vecSTerms,
- const vector<string>& vecTTerms, string& strOutcome,
+ const vector<string>& /*vecTTerms*/, string& strOutcome,
ostringstream& ostr) {
STreeItem* pCon1, *pCon2;
pCon1 = pParent->m_vecChildren[iPos - 1];
@@ -314,11 +313,11 @@ delete pZhangleMaxent;*/
* f8: the first and the last word of XP2's translation (f8_f, f8_l)
* f9: the translation of XP1's and XP2's head word (f9_1, f9_2)
*/
- void fnGenerateInstance(const SParsedTree* pTree, const STreeItem* pParent,
+ void fnGenerateInstance(const SParsedTree* /*pTree*/, const STreeItem* pParent,
const STreeItem* pCon1, const STreeItem* pCon2,
const SAlignment* pAlign,
const vector<string>& vecSTerms,
- const vector<string>& vecTTerms, string& strOutcome,
+ const vector<string>& /*vecTTerms*/, string& strOutcome,
ostringstream& ostr) {
int iLeft1, iRight1, iLeft2, iRight2;
@@ -401,8 +400,8 @@ delete pZhangleMaxent;*/
}
void fnGenerateInstanceFile(
- const char* pszSynFname, // source-side flattened parse tree file name
- const char* pszAlignFname, // alignment filename
+ const char* pszSynFname, // source-side flattened parse tree file name
+ const char* pszAlignFname, // alignment filename
const char* pszSourceFname, // source file name
const char* pszTargetFname, // target file name
const char* pszInstanceFname // training instance file name
@@ -507,8 +506,8 @@ delete pZhangleMaxent;*/
}
void fnGenerateInstanceFile2(
- const char* pszSynFname, // source-side flattened parse tree file name
- const char* pszAlignFname, // alignment filename
+ const char* pszSynFname, // source-side flattened parse tree file name
+ const char* pszAlignFname, // alignment filename
const char* pszSourceFname, // source file name
const char* pszTargetFname, // target file name
const char* pszInstanceFname // training instance file name
@@ -578,193 +577,6 @@ delete pZhangleMaxent;*/
}
};
-struct SConstContTrainer {
- SConstContTrainer(
- const char* pszFlattenedSynFname, // source-side flattened parse tree
- // file name
- const char* pszAlignFname, // alignment filename
- const char* pszSourceFname, // source file name
- const char* pszTargetFname, // target file name
- const char* pszInstanceFname, // training instance file name
- const char* pszModelPrefix, // classifier model file name prefix
- int iClassifierType, // classifier type
- int iCutoff, // feature count threshold
- const char* pszOption // other classifier parameters (for svmlight)
- ) {
- fnGenerateInstanceFile(pszFlattenedSynFname, pszAlignFname, pszSourceFname,
- pszTargetFname, pszInstanceFname);
- // fnTraining(pszInstanceFname, pszModelPrefix, iClassifierType, iCutoff,
- // pszOption);
- fnTraining(pszInstanceFname, pszModelPrefix, iCutoff);
- }
- ~SConstContTrainer() {}
-
- private:
- void fnTraining(const char* pszInstanceFname, const char* pszModelFname,
- int iCutoff) {
- char* pszNewInstanceFName = new char[strlen(pszInstanceFname) + 50];
- if (iCutoff > 0) {
- sprintf(pszNewInstanceFName, "%s.tmp", pszInstanceFname);
- fnPreparingTrainingdata(pszInstanceFname, iCutoff, pszNewInstanceFName);
- } else {
- strcpy(pszNewInstanceFName, pszInstanceFname);
- }
-
- /*Zhangle_Maxent *pZhangleMaxent = new Zhangle_Maxent(NULL);
- pZhangleMaxent->fnTrain(pszInstanceFname, "lbfgs", pszModelFname, 100,
- 2.0);
- delete pZhangleMaxent;*/
-
- Tsuruoka_Maxent* pMaxent = new Tsuruoka_Maxent(NULL);
- pMaxent->fnTrain(pszInstanceFname, "l1", pszModelFname, 300);
- delete pMaxent;
-
- if (strcmp(pszNewInstanceFName, pszInstanceFname) != 0) {
- sprintf(pszNewInstanceFName, "rm %s.tmp", pszInstanceFname);
- system(pszNewInstanceFName);
- }
- delete[] pszNewInstanceFName;
- }
-
- void fnGetFocusedParentNodes(const SParsedTree* pTree,
- vector<STreeItem*>& vecFocused) {
- for (size_t i = 0; i < pTree->m_vecTerminals.size(); i++) {
- STreeItem* pParent = pTree->m_vecTerminals[i]->m_ptParent;
-
- while (pParent != NULL) {
- // if (pParent->m_vecChildren.size() > 1 && pParent->m_iEnd -
- // pParent->m_iBegin > 5) {
- if (pParent->m_vecChildren.size() > 1) {
- // do constituent reordering for all children of pParent
- vecFocused.push_back(pParent);
- }
- if (pParent->m_iBrotherIndex != 0) break;
- pParent = pParent->m_ptParent;
- }
- }
- }
-
- inline void fnGetOutcome(int iL1, int iR1, const SAlignment* pAlign,
- string& strOutcome) {
- strOutcome = pAlign->fnIsContinuous(iL1, iR1);
- }
-
- inline string fnGetLengthType(int iLen) {
- if (iLen == 1) return string("1");
- if (iLen == 2) return string("2");
- if (iLen == 3) return string("3");
- if (iLen < 6) return string("4");
- if (iLen < 11) return string("6");
- return string("11");
- }
-
- /*
- * Source side (11 features):
- * f1: the syntactic category
- * f2: the syntactic category of its parent
- * f3: the head word's pos
- * f4: =1 if it's the head of its parent node
- * or
- * the head of its parent node
- * f5: length type
- */
- void fnGenerateInstance(const SParsedTree* pTree, const STreeItem* pCon1,
- const SAlignment* pAlign,
- const vector<string>& vecSTerms,
- const vector<string>& vecTTerms, string& strOutcome,
- ostringstream& ostr) {
-
- fnGetOutcome(pCon1->m_iBegin, pCon1->m_iEnd, pAlign, strOutcome);
-
- // generate features
- // f1
- ostr << "f1=" << pCon1->m_pszTerm;
- // f2
- ostr << " f2=" << pCon1->m_ptParent->m_pszTerm;
- // f3
- ostr << " f3=" << pTree->m_vecTerminals[pCon1->m_iHeadWord]
- ->m_ptParent->m_pszTerm;
- // f4
- if (pCon1->m_iBrotherIndex == pCon1->m_ptParent->m_iHeadChild) {
- ostr << " f4=1";
- } else {
- ostr << " f4="
- << pCon1->m_ptParent->m_vecChildren[pCon1->m_ptParent->m_iHeadChild]
- ->m_pszTerm;
- }
- // f5
- ostr << " f5=" << fnGetLengthType(pCon1->m_iEnd - pCon1->m_iBegin + 1);
- }
-
- void fnGenerateInstanceFile(
- const char* pszFlattenedSynFname, // source-side flattened parse tree
- // file name
- const char* pszAlignFname, // alignment filename
- const char* pszSourceFname, // source file name
- const char* pszTargetFname, // target file name
- const char* pszInstanceFname // training instance file name
- ) {
- SAlignmentReader* pAlignReader = new SAlignmentReader(pszAlignFname);
- SParseReader* pParseReader = new SParseReader(pszFlattenedSynFname, true);
- STxtFileReader* pTxtSReader = new STxtFileReader(pszSourceFname);
- STxtFileReader* pTxtTReader = new STxtFileReader(pszTargetFname);
-
- FILE* fpOut = fopen(pszInstanceFname, "w");
- assert(fpOut != NULL);
-
- // read sentence by sentence
- SAlignment* pAlign;
- SParsedTree* pTree;
- char* pszLine = new char[50001];
- int iSentNum = 0;
- while ((pAlign = pAlignReader->fnReadNextAlignment()) != NULL) {
- pTree = pParseReader->fnReadNextParseTree();
- assert(pTree != NULL);
- assert(pTxtSReader->fnReadNextLine(pszLine, NULL));
- vector<string> vecSTerms;
- SplitOnWhitespace(string(pszLine), &vecSTerms);
- assert(pTxtTReader->fnReadNextLine(pszLine, NULL));
- vector<string> vecTTerms;
- SplitOnWhitespace(string(pszLine), &vecTTerms);
-
- vector<STreeItem*> vecFocused;
- fnGetFocusedParentNodes(pTree, vecFocused);
-
- for (size_t i = 0;
- i < vecFocused.size() && pTree->m_vecTerminals.size() > 10; i++) {
-
- STreeItem* pParent = vecFocused[i];
-
- for (size_t j = 0; j < pParent->m_vecChildren.size(); j++) {
- // children[j-1] vs. children[j] reordering
-
- string strOutcome;
- ostringstream ostr;
-
- fnGenerateInstance(pTree, pParent->m_vecChildren[j], pAlign,
- vecSTerms, vecTTerms, strOutcome, ostr);
-
- // fprintf(stderr, "%s %s\n", ostr.str().c_str(), strOutcome.c_str());
- fprintf(fpOut, "%s %s\n", ostr.str().c_str(), strOutcome.c_str());
- }
- }
-
- delete pAlign;
- delete pTree;
- iSentNum++;
-
- if (iSentNum % 100000 == 0) fprintf(stderr, "#%d\n", iSentNum);
- }
-
- fclose(fpOut);
- delete pAlignReader;
- delete pParseReader;
- delete pTxtSReader;
- delete pTxtTReader;
- delete[] pszLine;
- }
-};
-
inline void print_options(std::ostream& out,
po::options_description const& opts) {
typedef std::vector<boost::shared_ptr<po::option_description> > Ds;
@@ -781,12 +593,11 @@ inline string str(char const* name, po::variables_map const& conf) {
}
//--parse_file /scratch0/mt_exp/gq-ctb/data/train.srl.cn --align_file
-///scratch0/mt_exp/gq-ctb/data/aligned.grow-diag-final-and --source_file
-///scratch0/mt_exp/gq-ctb/data/train.cn --target_file
-///scratch0/mt_exp/gq-ctb/data/train.en --instance_file
-///scratch0/mt_exp/gq-ctb/data/srl-instance --model_prefix
-///scratch0/mt_exp/gq-ctb/data/srl-instance --feature_cutoff 10
-//--classifier_type 1
+/// scratch0/mt_exp/gq-ctb/data/aligned.grow-diag-final-and --source_file
+/// scratch0/mt_exp/gq-ctb/data/train.cn --target_file
+/// scratch0/mt_exp/gq-ctb/data/train.en --instance_file
+/// scratch0/mt_exp/gq-ctb/data/srl-instance --model_prefix
+/// scratch0/mt_exp/gq-ctb/data/srl-instance --feature_cutoff 10
int main(int argc, char** argv) {
po::options_description opts("Configuration options");
@@ -798,11 +609,9 @@ int main(int argc, char** argv) {
"instance_file", po::value<string>(), "Instance file path (output)")(
"model_prefix", po::value<string>(),
"Model file path prefix (output): three files will be generated")(
- "classifier_type", po::value<int>()->default_value(1),
- "Classifier type: 1 for openNLP maxent; 2 for Zhangle maxent; and 3 for "
- "SVMLight")("feature_cutoff", po::value<int>()->default_value(100),
- "Feature cutoff threshold")(
- "svm_option", po::value<string>(), "Parameters for SVMLight classifier")(
+ "feature_cutoff", po::value<int>()->default_value(100),
+ "Feature cutoff threshold")("svm_option", po::value<string>(),
+ "Parameters for SVMLight classifier")(
"help", "produce help message");
po::variables_map vm;
@@ -839,8 +648,8 @@ int main(int argc, char** argv) {
str("parse_file", vm).c_str(), str("align_file", vm).c_str(),
str("source_file", vm).c_str(), str("target_file", vm).c_str(),
str("instance_file", vm).c_str(), str("model_prefix", vm).c_str(),
- vm["classifier_type"].as<int>(), vm["feature_cutoff"].as<int>(), pOption);
+ vm["feature_cutoff"].as<int>(), pOption);
delete pTrainer;
- return 1;
+ return 0;
}
diff --git a/utils/lbfgs.h b/utils/lbfgs.h
index ed5cd944..4d706f7a 100644
--- a/utils/lbfgs.h
+++ b/utils/lbfgs.h
@@ -1,6 +1,8 @@
#ifndef _LBFGS_H_
#define _LBFGS_H_
+#include <vector>
+
// template<class FuncGrad>
// std::vector<double>
// perform_LBFGS(FuncGrad func_grad, const std::vector<double> & x0);
@@ -13,7 +15,6 @@ std::vector<double> perform_OWLQN(
double (*func_grad)(const std::vector<double> &, std::vector<double> &),
const std::vector<double> &x0, const double C);
-// const int LBFGS_M = 7;
const int LBFGS_M = 10;
#endif
diff --git a/utils/maxent.cpp b/utils/maxent.cpp
index 9115f6f2..0f49ee9d 100644
--- a/utils/maxent.cpp
+++ b/utils/maxent.cpp
@@ -142,7 +142,7 @@ int ME_Model::make_feature_bag(const int cutoff) {
// count the occurrences of features
#ifdef USE_HASH_MAP
- typedef __gnu_cxx::hash_map<unsigned int, int> map_type;
+ typedef std::unordered_map<unsigned int, int> map_type;
#else
typedef std::map<unsigned int, int> map_type;
#endif
diff --git a/utils/srl_sentence.h b/utils/srl_sentence.h
index 61532fb2..9d509600 100644
--- a/utils/srl_sentence.h
+++ b/utils/srl_sentence.h
@@ -8,15 +8,12 @@
#ifndef SRL_SENTENCE_H_
#define SRL_SENTENCE_H_
-
#include <sstream>
#include <vector>
#include "tree.h"
#include "stringlib.h"
-using namespace std;
-
struct SArgument {
SArgument(const char* pszRole, int iBegin, int iEnd, float fProb) {
m_pszRole = new char[strlen(pszRole) + 1];
@@ -38,7 +35,7 @@ struct SArgument {
char* m_pszRole; // argument rule, e.g., ARG0, ARGM-TMP
int m_iBegin;
- int m_iEnd; // the span of the argument, [m_iBegin, m_iEnd]
+ int m_iEnd; // the span of the argument, [m_iBegin, m_iEnd]
float m_fProb; // the probability of this role,
STreeItem* m_pTreeItem;
};
@@ -68,8 +65,8 @@ struct SPredicate {
char* m_pszLemma; // lemma of the predicate, for Chinese, it's always as same
// as the predicate itself
- int m_iPosition; // the position in sentence
- vector<SArgument*> m_vecArgt; // arguments associated to the predicate
+ int m_iPosition; // the position in sentence
+ std::vector<SArgument*> m_vecArgt; // arguments associated to the predicate
};
struct SSrlSentence {
@@ -91,7 +88,7 @@ struct SSrlSentence {
int GetPredicateNum() { return m_vecPred.size(); }
SParsedTree* m_pTree;
- vector<SPredicate*> m_vecPred;
+ std::vector<SPredicate*> m_vecPred;
};
struct SSrlSentenceReader {
@@ -116,7 +113,7 @@ struct SSrlSentenceReader {
// TODO: here only considers flat predicate-argument structure
// i.e., no overlap among them
SSrlSentence* fnReadNextSrlSentence() {
- vector<vector<string> > vecContent;
+ std::vector<std::vector<std::string> > vecContent;
if (fnReadNextContent(vecContent) == false) return NULL;
SSrlSentence* pSrlSentence = new SSrlSentence();
@@ -124,18 +121,18 @@ struct SSrlSentenceReader {
// put together syntactic text
std::ostringstream ostr;
for (int i = 0; i < iSize; i++) {
- string strSynSeg =
+ std::string strSynSeg =
vecContent[i][5]; // the 5th column is the syntactic segment
size_t iPosition = strSynSeg.find_first_of('*');
- assert(iPosition != string::npos);
- ostringstream ostrTmp;
+ assert(iPosition != std::string::npos);
+ std::ostringstream ostrTmp;
ostrTmp << "(" << vecContent[i][2] << " " << vecContent[i][0]
<< ")"; // the 2th column is POS-tag, and the 0th column is word
strSynSeg.replace(iPosition, 1, ostrTmp.str());
fnReplaceAll(strSynSeg, "(", " (");
ostr << strSynSeg;
}
- string strSyn = ostr.str();
+ std::string strSyn = ostr.str();
pSrlSentence->m_pTree = SParsedTree::fnConvertFromString(strSyn.c_str());
pSrlSentence->m_pTree->fnSetHeadWord();
pSrlSentence->m_pTree->fnSetSpanInfo();
@@ -143,9 +140,9 @@ struct SSrlSentenceReader {
// read predicate-argument structure
int iNumPred = vecContent[0].size() - 8;
for (int i = 0; i < iNumPred; i++) {
- vector<string> vecRole;
- vector<int> vecBegin;
- vector<int> vecEnd;
+ std::vector<std::string> vecRole;
+ std::vector<int> vecBegin;
+ std::vector<int> vecEnd;
int iPred = -1;
for (int j = 0; j < iSize; j++) {
const char* p = vecContent[j][i + 8].c_str();
@@ -184,7 +181,7 @@ struct SSrlSentenceReader {
}
private:
- bool fnReadNextContent(vector<vector<string> >& vecContent) {
+ bool fnReadNextContent(std::vector<std::vector<std::string> >& vecContent) {
vecContent.clear();
if (feof(m_fpIn) == true) return false;
char* pszLine;
@@ -200,7 +197,7 @@ struct SSrlSentenceReader {
}
if (iLen == 0) break; // end of this sentence
- vector<string> terms = SplitOnWhitespace(string(pszLine));
+ std::vector<std::string> terms = SplitOnWhitespace(std::string(pszLine));
assert(terms.size() > 7);
vecContent.push_back(terms);
}
diff --git a/utils/synutils.h b/utils/synutils.h
index ef7b78b7..f611553e 100644
--- a/utils/synutils.h
+++ b/utils/synutils.h
@@ -17,21 +17,17 @@
#include <string>
#include <unordered_map>
-using namespace std;
-
typedef std::unordered_map<std::string, int> MapString2Int;
typedef std::unordered_map<std::string, float> MapString2Float;
typedef std::unordered_map<std::string, float>::iterator
MapString2FloatIterator;
-using namespace std;
-
struct SFReader {
SFReader() {}
virtual ~SFReader() {}
virtual bool fnReadNextLine(char* pszLine, int* piLength) = 0;
- virtual bool fnReadNextLine(string& strLine) = 0;
+ virtual bool fnReadNextLine(std::string& strLine) = 0;
};
struct STxtFileReader : public SFReader {
@@ -63,13 +59,13 @@ struct STxtFileReader : public SFReader {
return true;
}
- bool fnReadNextLine(string& strLine) {
+ bool fnReadNextLine(std::string& strLine) {
char* pszLine = new char[10001];
bool bOut = fnReadNextLine(pszLine, NULL);
if (bOut)
- strLine = string(pszLine);
+ strLine = std::string(pszLine);
else
- strLine = string("");
+ strLine = std::string("");
delete[] pszLine;
return bOut;
@@ -108,13 +104,13 @@ struct SGZFileReader : public SFReader {
return true;
}
- bool fnReadNextLine(string& strLine) {
+ bool fnReadNextLine(std::string& strLine) {
char* pszLine = new char[10001];
bool bOut = fnReadNextLine(pszLine, NULL);
if (bOut)
- strLine = string(pszLine);
+ strLine = std::string(pszLine);
else
- strLine = string("");
+ strLine = std::string("");
delete[] pszLine;
return bOut;
diff --git a/utils/tree.h b/utils/tree.h
index 8070f828..6c3406d6 100644
--- a/utils/tree.h
+++ b/utils/tree.h
@@ -14,8 +14,6 @@
#include <string>
#include <vector>
-using namespace std;
-
struct STreeItem {
STreeItem(const char *pszTerm) {
m_pszTerm = new char[strlen(pszTerm) + 1];
@@ -53,18 +51,18 @@ struct STreeItem {
public:
char *m_pszTerm;
- vector<STreeItem *> m_vecChildren; // children items
- STreeItem *m_ptParent; // the parent item
+ std::vector<STreeItem *> m_vecChildren; // children items
+ STreeItem *m_ptParent; // the parent item
int m_iBegin;
- int m_iEnd; // the node span words[m_iBegin, m_iEnd]
- int m_iHeadChild; // the index of its head child
- int m_iHeadWord; // the index of its head word
+ int m_iEnd; // the node span words[m_iBegin, m_iEnd]
+ int m_iHeadChild; // the index of its head child
+ int m_iHeadWord; // the index of its head word
int m_iBrotherIndex; // the index in his brothers
};
struct SGetHeadWord {
- typedef vector<string> CVectorStr;
+ typedef std::vector<std::string> CVectorStr;
SGetHeadWord() {}
~SGetHeadWord() {}
int fnGetHeadWord(char *pszCFGLeft, CVectorStr vectRight) {
@@ -311,7 +309,7 @@ struct SParsedTree {
if (strcmp(pszStr, "(())") == 0) return NULL;
SParsedTree *pTree = new SParsedTree();
- vector<string> vecSyn;
+ std::vector<std::string> vecSyn;
fnReadSyntactic(pszStr, vecSyn);
int iLeft = 1, iRight = 1; //# left/right parenthesis
@@ -418,13 +416,13 @@ struct SParsedTree {
for (I = 0; I < ptItem->m_vecChildren.size(); I++)
fnSuffixTraverseSetHeadWord(ptItem->m_vecChildren[I], pGetHeadWord);
- vector<string> vecRight;
+ std::vector<std::string> vecRight;
if (ptItem->m_vecChildren.size() == 1)
iHeadchild = 0;
else {
for (I = 0; I < ptItem->m_vecChildren.size(); I++)
- vecRight.push_back(string(ptItem->m_vecChildren[I]->m_pszTerm));
+ vecRight.push_back(std::string(ptItem->m_vecChildren[I]->m_pszTerm));
iHeadchild = pGetHeadWord->fnGetHeadWord(ptItem->m_pszTerm, vecRight);
}
@@ -433,7 +431,8 @@ struct SParsedTree {
ptItem->m_iHeadWord = ptItem->m_vecChildren[iHeadchild]->m_iHeadWord;
}
- static void fnReadSyntactic(const char *pszSyn, vector<string> &vec) {
+ static void fnReadSyntactic(const char *pszSyn,
+ std::vector<std::string> &vec) {
char *p;
int I;
@@ -481,29 +480,29 @@ struct SParsedTree {
if ((pszTerm[0] == '(') || (pszTerm[strlen(pszTerm) - 1] == ')')) {
if (pszTerm[0] == '(') {
- vec.push_back(string("("));
+ vec.push_back(std::string("("));
iLeftNum++;
I = 1;
while (pszTerm[I] == '(' && pszTerm[I] != '\0') {
- vec.push_back(string("("));
+ vec.push_back(std::string("("));
iLeftNum++;
I++;
}
- if (strlen(pszTerm) > 1) vec.push_back(string(pszTerm + I));
+ if (strlen(pszTerm) > 1) vec.push_back(std::string(pszTerm + I));
} else {
char *pTmp;
pTmp = pszTerm + strlen(pszTerm) - 1;
while ((pTmp[0] == ')') && (pTmp >= pszTerm)) pTmp--;
pTmp[1] = '\0';
- if (strlen(pszTerm) > 0) vec.push_back(string(pszTerm));
+ if (strlen(pszTerm) > 0) vec.push_back(std::string(pszTerm));
pTmp += 2;
for (I = 0; I <= (int)strlen(pTmp); I++) {
- vec.push_back(string(")"));
+ vec.push_back(std::string(")"));
iRightNum++;
}
}
@@ -512,26 +511,26 @@ struct SParsedTree {
q = strchr(pszTerm, ')');
if (q != NULL) {
q[0] = '\0';
- if (pszTerm[0] != '\0') vec.push_back(string(pszTerm));
- vec.push_back(string(")"));
+ if (pszTerm[0] != '\0') vec.push_back(std::string(pszTerm));
+ vec.push_back(std::string(")"));
iRightNum++;
q++;
while (q[0] == ')') {
- vec.push_back(string(")"));
+ vec.push_back(std::string(")"));
q++;
iRightNum++;
}
while (q[0] == '(') {
- vec.push_back(string("("));
+ vec.push_back(std::string("("));
q++;
iLeftNum++;
}
- if (q[0] != '\0') vec.push_back(string(q));
+ if (q[0] != '\0') vec.push_back(std::string(q));
} else
- vec.push_back(string(pszTerm));
+ vec.push_back(std::string(pszTerm));
}
}
@@ -547,10 +546,10 @@ struct SParsedTree {
if (vec.size() >= 2 && strcmp(vec[1].c_str(), "(") == 0) {
//( (IP..) )
- std::vector<string>::iterator it;
+ std::vector<std::string>::iterator it;
it = vec.begin();
it++;
- vec.insert(it, string("ROOT"));
+ vec.insert(it, std::string("ROOT"));
}
break;
@@ -563,7 +562,7 @@ struct SParsedTree {
public:
STreeItem *m_ptRoot;
- vector<STreeItem *> m_vecTerminals; // the leaf nodes
+ std::vector<STreeItem *> m_vecTerminals; // the leaf nodes
};
struct SParseReader {
@@ -645,7 +644,7 @@ struct SParseReader {
for (size_t i = 0; i < pTreeItem->m_vecChildren.size(); i++)
fnSuffixTraverseSetHeadWord(pTreeItem->m_vecChildren[i]);
- vector<string> vecRight;
+ std::vector<std::string> vecRight;
int iHeadchild;
@@ -658,7 +657,7 @@ struct SParseReader {
if (p[0] == '*' && p[strlen(p) - 1] == '*') {
iHeadchild = i;
p[strlen(p) - 1] = '\0';
- string str = p + 1;
+ std::string str = p + 1;
strcpy(p, str.c_str()); // erase the "*..*"
break;
}
diff --git a/utils/tsuruoka_maxent.h b/utils/tsuruoka_maxent.h
index e6bef232..550a4b7f 100644
--- a/utils/tsuruoka_maxent.h
+++ b/utils/tsuruoka_maxent.h
@@ -6,17 +6,16 @@
#ifndef TSURUOKA_MAXENT_H_
#define TSURUOKA_MAXENT_H_
-#include "synutils.h"
-#include "stringlib.h"
-#include "maxent.h"
-
#include <assert.h>
-#include <vector>
-#include <string>
#include <string.h>
+#include <string>
#include <unordered_map>
+#include <utility>
+#include <vector>
-using namespace std;
+#include "synutils.h"
+#include "stringlib.h"
+#include "maxent.h"
typedef std::unordered_map<std::string, int> Map;
typedef std::unordered_map<std::string, int>::iterator Iterator;
@@ -35,7 +34,7 @@ struct Tsuruoka_Maxent {
}
void fnTrain(const char* pszInstanceFName, const char* pszAlgorithm,
- const char* pszModelFName, int iNumIteration) {
+ const char* pszModelFName, int /*iNumIteration*/) {
assert(strcmp(pszAlgorithm, "l1") == 0 || strcmp(pszAlgorithm, "l2") == 0 ||
strcmp(pszAlgorithm, "sgd") == 0 ||
strcmp(pszAlgorithm, "SGD") == 0);
@@ -67,10 +66,10 @@ struct Tsuruoka_Maxent {
assert(p != NULL);
p[0] = '\0';
p++;
- vector<string> vecContext;
- SplitOnWhitespace(string(pszLine), &vecContext);
+ std::vector<std::string> vecContext;
+ SplitOnWhitespace(std::string(pszLine), &vecContext);
- pmes->label = string(p);
+ pmes->label = std::string(p);
for (size_t i = 0; i < vecContext.size(); i++)
pmes->add_feature(vecContext[i]);
pModel->add_training_sample((*pmes));
@@ -98,53 +97,53 @@ struct Tsuruoka_Maxent {
}
double fnEval(const char* pszContext, const char* pszOutcome) const {
- vector<string> vecContext;
+ std::vector<std::string> vecContext;
ME_Sample* pmes = new ME_Sample();
- SplitOnWhitespace(string(pszContext), &vecContext);
+ SplitOnWhitespace(std::string(pszContext), &vecContext);
for (size_t i = 0; i < vecContext.size(); i++)
pmes->add_feature(vecContext[i]);
- vector<double> vecProb = m_pModel->classify(*pmes);
+ std::vector<double> vecProb = m_pModel->classify(*pmes);
delete pmes;
int iLableID = m_pModel->get_class_id(pszOutcome);
return vecProb[iLableID];
}
void fnEval(const char* pszContext,
- vector<pair<string, double> >& vecOutput) const {
- vector<string> vecContext;
+ std::vector<std::pair<std::string, double> >& vecOutput) const {
+ std::vector<std::string> vecContext;
ME_Sample* pmes = new ME_Sample();
- SplitOnWhitespace(string(pszContext), &vecContext);
+ SplitOnWhitespace(std::string(pszContext), &vecContext);
vecOutput.clear();
for (size_t i = 0; i < vecContext.size(); i++)
pmes->add_feature(vecContext[i]);
- vector<double> vecProb = m_pModel->classify(*pmes);
+ std::vector<double> vecProb = m_pModel->classify(*pmes);
for (size_t i = 0; i < vecProb.size(); i++) {
- string label = m_pModel->get_class_label(i);
+ std::string label = m_pModel->get_class_label(i);
vecOutput.push_back(make_pair(label, vecProb[i]));
}
delete pmes;
}
- void fnEval(const char* pszContext, vector<double>& vecOutput) const {
- vector<string> vecContext;
+ void fnEval(const char* pszContext, std::vector<double>& vecOutput) const {
+ std::vector<std::string> vecContext;
ME_Sample* pmes = new ME_Sample();
- SplitOnWhitespace(string(pszContext), &vecContext);
+ SplitOnWhitespace(std::string(pszContext), &vecContext);
vecOutput.clear();
for (size_t i = 0; i < vecContext.size(); i++)
pmes->add_feature(vecContext[i]);
- vector<double> vecProb = m_pModel->classify(*pmes);
+ std::vector<double> vecProb = m_pModel->classify(*pmes);
for (size_t i = 0; i < vecProb.size(); i++) {
- string label = m_pModel->get_class_label(i);
+ std::string label = m_pModel->get_class_label(i);
vecOutput.push_back(vecProb[i]);
}
delete pmes;
}
- int fnGetClassId(const string& strLabel) const {
+ int fnGetClassId(const std::string& strLabel) const {
return m_pModel->get_class_id(strLabel);
}