summaryrefslogtreecommitdiff
path: root/dpmert
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-31 13:57:24 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-31 13:57:24 +0200
commitf1ba05780db1705493d9afb562332498b93d26f1 (patch)
treefb429a657ba97f33e8140742de9bc74d9fc88e75 /dpmert
parentaadabfdf37dfd451485277cb77fad02f77b361c6 (diff)
parent317d650f6cb1e24ac6f3be6f7bf9d4246a59e0e5 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'dpmert')
-rw-r--r--dpmert/Jamfile32
-rw-r--r--dpmert/ces.cc2
-rwxr-xr-xdpmert/divide_refs.py15
-rw-r--r--dpmert/lo_test.cc10
-rw-r--r--dpmert/mr_dpmert_generate_mapper_input.cc7
-rw-r--r--dpmert/mr_dpmert_map.cc2
6 files changed, 60 insertions, 8 deletions
diff --git a/dpmert/Jamfile b/dpmert/Jamfile
new file mode 100644
index 00000000..bc4b079b
--- /dev/null
+++ b/dpmert/Jamfile
@@ -0,0 +1,32 @@
+import testing ;
+import lex ;
+import option ;
+
+lib dpmert :
+ ces.cc
+ error_surface.cc
+ line_optimizer.cc
+ mert_geometry.cc
+ ..//utils
+ ..//mteval
+ ..//decoder
+ ../klm/lm//kenlm
+ ..//boost_program_options
+ : <include>.
+ : :
+ <library>..//utils
+ <library>..//mteval
+ <library>../klm/lm//kenlm
+ <library>..//boost_program_options
+ <include>.
+ ;
+
+all_tests [ glob *_test.cc ] : dpmert : <testing.arg>$(TOP)/dpmert/test_data ;
+
+exe sentserver : sentserver.c : <threading>multi ;
+exe sentclient : sentclient.c ;
+exe mr_dpmert_generate_mapper_input : mr_dpmert_generate_mapper_input.cc dpmert ..//boost_program_options ;
+exe mr_dpmert_map : mr_dpmert_map.cc dpmert ..//boost_program_options ;
+exe mr_dpmert_reduce : mr_dpmert_reduce.cc dpmert ..//boost_program_options ;
+
+alias programs : sentserver sentclient mr_dpmert_generate_mapper_input mr_dpmert_map mr_dpmert_reduce ;
diff --git a/dpmert/ces.cc b/dpmert/ces.cc
index c6cb1cdf..157b2d17 100644
--- a/dpmert/ces.cc
+++ b/dpmert/ces.cc
@@ -25,7 +25,7 @@ void ComputeErrorSurface(const SegmentEvaluator& ss,
env->resize(ienv.size());
SufficientStats prev_score; // defaults to 0
int j = 0;
- for (int i = 0; i < ienv.size(); ++i) {
+ for (unsigned i = 0; i < ienv.size(); ++i) {
const MERTPoint& seg = *ienv[i];
vector<WordID> trans;
#if 0
diff --git a/dpmert/divide_refs.py b/dpmert/divide_refs.py
new file mode 100755
index 00000000..b478f918
--- /dev/null
+++ b/dpmert/divide_refs.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+import sys
+
+(numRefs, outPrefix) = sys.argv[1:]
+numRefs = int(numRefs)
+
+outs = [open(outPrefix+str(i), "w") for i in range(numRefs)]
+
+i = 0
+for line in sys.stdin:
+ outs[i].write(line)
+ i = (i + 1) % numRefs
+
+for out in outs:
+ out.close()
diff --git a/dpmert/lo_test.cc b/dpmert/lo_test.cc
index d47a95b5..2daf87bb 100644
--- a/dpmert/lo_test.cc
+++ b/dpmert/lo_test.cc
@@ -88,7 +88,7 @@ BOOST_AUTO_TEST_CASE(TestConvexHullInside) {
if (!d) break;
cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
}
- for (int i = 0; i < segs.size(); ++i) {
+ for (unsigned i = 0; i < segs.size(); ++i) {
cerr << "seg=" << i << endl;
vector<WordID> trans;
segs[i]->ConstructTranslation(&trans);
@@ -118,13 +118,15 @@ BOOST_AUTO_TEST_CASE( TestS1) {
to_optimize.push_back(fPhraseModel_1);
to_optimize.push_back(fPhraseModel_2);
+ std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+
Hypergraph hg;
- ReadFile rf("./test_data/0.json.gz");
+ ReadFile rf(path + "/0.json.gz");
HypergraphIO::ReadFromJSON(rf.stream(), &hg);
hg.Reweight(wts);
Hypergraph hg2;
- ReadFile rf2("./test_data/1.json.gz");
+ ReadFile rf2(path + "/1.json.gz");
HypergraphIO::ReadFromJSON(rf2.stream(), &hg2);
hg2.Reweight(wts);
@@ -149,7 +151,7 @@ BOOST_AUTO_TEST_CASE( TestS1) {
&rng,
&axes);
assert(axes.size() == 10 + to_optimize.size());
- for (int i = 0; i < axes.size(); ++i)
+ for (unsigned i = 0; i < axes.size(); ++i)
cerr << axes[i] << endl;
const SparseVector<double>& axis = axes[0];
diff --git a/dpmert/mr_dpmert_generate_mapper_input.cc b/dpmert/mr_dpmert_generate_mapper_input.cc
index 59d4f24f..199cd23a 100644
--- a/dpmert/mr_dpmert_generate_mapper_input.cc
+++ b/dpmert/mr_dpmert_generate_mapper_input.cc
@@ -52,12 +52,15 @@ int main(int argc, char** argv) {
Weights::InitFromFile(conf["weights"].as<string>(), &w, &features);
Weights::InitSparseVector(w, &origin);
const string forest_repository = conf["forest_repository"].as<string>();
- assert(DirectoryExists(forest_repository));
+ if (!DirectoryExists(forest_repository)) {
+ cerr << "Forest repository directory " << forest_repository << " not found!\n";
+ return 1;
+ }
if (conf.count("optimize_feature") > 0)
features=conf["optimize_feature"].as<vector<string> >();
vector<SparseVector<weight_t> > directions;
vector<int> fids(features.size());
- for (int i = 0; i < features.size(); ++i)
+ for (unsigned i = 0; i < features.size(); ++i)
fids[i] = FD::Convert(features[i]);
LineOptimizer::CreateOptimizationDirections(
fids,
diff --git a/dpmert/mr_dpmert_map.cc b/dpmert/mr_dpmert_map.cc
index f3304f0f..d1efcf96 100644
--- a/dpmert/mr_dpmert_map.cc
+++ b/dpmert/mr_dpmert_map.cc
@@ -52,7 +52,7 @@ bool ReadSparseVectorString(const string& s, SparseVector<double>* v) {
vector<string> fields;
Tokenize(s, ';', &fields);
if (fields.empty()) return false;
- for (int i = 0; i < fields.size(); ++i) {
+ for (unsigned i = 0; i < fields.size(); ++i) {
vector<string> pair(2);
Tokenize(fields[i], '=', &pair);
if (pair.size() != 2) {