diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-31 13:57:24 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-31 13:57:24 +0200 |
commit | f1ba05780db1705493d9afb562332498b93d26f1 (patch) | |
tree | fb429a657ba97f33e8140742de9bc74d9fc88e75 /dpmert | |
parent | aadabfdf37dfd451485277cb77fad02f77b361c6 (diff) | |
parent | 317d650f6cb1e24ac6f3be6f7bf9d4246a59e0e5 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'dpmert')
-rw-r--r-- | dpmert/Jamfile | 32 | ||||
-rw-r--r-- | dpmert/ces.cc | 2 | ||||
-rwxr-xr-x | dpmert/divide_refs.py | 15 | ||||
-rw-r--r-- | dpmert/lo_test.cc | 10 | ||||
-rw-r--r-- | dpmert/mr_dpmert_generate_mapper_input.cc | 7 | ||||
-rw-r--r-- | dpmert/mr_dpmert_map.cc | 2 |
6 files changed, 60 insertions, 8 deletions
diff --git a/dpmert/Jamfile b/dpmert/Jamfile new file mode 100644 index 00000000..bc4b079b --- /dev/null +++ b/dpmert/Jamfile @@ -0,0 +1,32 @@ +import testing ; +import lex ; +import option ; + +lib dpmert : + ces.cc + error_surface.cc + line_optimizer.cc + mert_geometry.cc + ..//utils + ..//mteval + ..//decoder + ../klm/lm//kenlm + ..//boost_program_options + : <include>. + : : + <library>..//utils + <library>..//mteval + <library>../klm/lm//kenlm + <library>..//boost_program_options + <include>. + ; + +all_tests [ glob *_test.cc ] : dpmert : <testing.arg>$(TOP)/dpmert/test_data ; + +exe sentserver : sentserver.c : <threading>multi ; +exe sentclient : sentclient.c ; +exe mr_dpmert_generate_mapper_input : mr_dpmert_generate_mapper_input.cc dpmert ..//boost_program_options ; +exe mr_dpmert_map : mr_dpmert_map.cc dpmert ..//boost_program_options ; +exe mr_dpmert_reduce : mr_dpmert_reduce.cc dpmert ..//boost_program_options ; + +alias programs : sentserver sentclient mr_dpmert_generate_mapper_input mr_dpmert_map mr_dpmert_reduce ; diff --git a/dpmert/ces.cc b/dpmert/ces.cc index c6cb1cdf..157b2d17 100644 --- a/dpmert/ces.cc +++ b/dpmert/ces.cc @@ -25,7 +25,7 @@ void ComputeErrorSurface(const SegmentEvaluator& ss, env->resize(ienv.size()); SufficientStats prev_score; // defaults to 0 int j = 0; - for (int i = 0; i < ienv.size(); ++i) { + for (unsigned i = 0; i < ienv.size(); ++i) { const MERTPoint& seg = *ienv[i]; vector<WordID> trans; #if 0 diff --git a/dpmert/divide_refs.py b/dpmert/divide_refs.py new file mode 100755 index 00000000..b478f918 --- /dev/null +++ b/dpmert/divide_refs.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +import sys + +(numRefs, outPrefix) = sys.argv[1:] +numRefs = int(numRefs) + +outs = [open(outPrefix+str(i), "w") for i in range(numRefs)] + +i = 0 +for line in sys.stdin: + outs[i].write(line) + i = (i + 1) % numRefs + +for out in outs: + out.close() diff --git a/dpmert/lo_test.cc b/dpmert/lo_test.cc index d47a95b5..2daf87bb 100644 --- a/dpmert/lo_test.cc +++ b/dpmert/lo_test.cc @@ -88,7 +88,7 @@ BOOST_AUTO_TEST_CASE(TestConvexHullInside) { if (!d) break; cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; } - for (int i = 0; i < segs.size(); ++i) { + for (unsigned i = 0; i < segs.size(); ++i) { cerr << "seg=" << i << endl; vector<WordID> trans; segs[i]->ConstructTranslation(&trans); @@ -118,13 +118,15 @@ BOOST_AUTO_TEST_CASE( TestS1) { to_optimize.push_back(fPhraseModel_1); to_optimize.push_back(fPhraseModel_2); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + Hypergraph hg; - ReadFile rf("./test_data/0.json.gz"); + ReadFile rf(path + "/0.json.gz"); HypergraphIO::ReadFromJSON(rf.stream(), &hg); hg.Reweight(wts); Hypergraph hg2; - ReadFile rf2("./test_data/1.json.gz"); + ReadFile rf2(path + "/1.json.gz"); HypergraphIO::ReadFromJSON(rf2.stream(), &hg2); hg2.Reweight(wts); @@ -149,7 +151,7 @@ BOOST_AUTO_TEST_CASE( TestS1) { &rng, &axes); assert(axes.size() == 10 + to_optimize.size()); - for (int i = 0; i < axes.size(); ++i) + for (unsigned i = 0; i < axes.size(); ++i) cerr << axes[i] << endl; const SparseVector<double>& axis = axes[0]; diff --git a/dpmert/mr_dpmert_generate_mapper_input.cc b/dpmert/mr_dpmert_generate_mapper_input.cc index 59d4f24f..199cd23a 100644 --- a/dpmert/mr_dpmert_generate_mapper_input.cc +++ b/dpmert/mr_dpmert_generate_mapper_input.cc @@ -52,12 +52,15 @@ int main(int argc, char** argv) { Weights::InitFromFile(conf["weights"].as<string>(), &w, &features); Weights::InitSparseVector(w, &origin); const string forest_repository = conf["forest_repository"].as<string>(); - assert(DirectoryExists(forest_repository)); + if (!DirectoryExists(forest_repository)) { + cerr << "Forest repository directory " << forest_repository << " not found!\n"; + return 1; + } if (conf.count("optimize_feature") > 0) features=conf["optimize_feature"].as<vector<string> >(); vector<SparseVector<weight_t> > directions; vector<int> fids(features.size()); - for (int i = 0; i < features.size(); ++i) + for (unsigned i = 0; i < features.size(); ++i) fids[i] = FD::Convert(features[i]); LineOptimizer::CreateOptimizationDirections( fids, diff --git a/dpmert/mr_dpmert_map.cc b/dpmert/mr_dpmert_map.cc index f3304f0f..d1efcf96 100644 --- a/dpmert/mr_dpmert_map.cc +++ b/dpmert/mr_dpmert_map.cc @@ -52,7 +52,7 @@ bool ReadSparseVectorString(const string& s, SparseVector<double>* v) { vector<string> fields; Tokenize(s, ';', &fields); if (fields.empty()) return false; - for (int i = 0; i < fields.size(); ++i) { + for (unsigned i = 0; i < fields.size(); ++i) { vector<string> pair(2); Tokenize(fields[i], '=', &pair); if (pair.size() != 2) { |