summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-01-27 14:49:08 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-01-27 14:49:08 -0500
commit89b662b51373f0f466d62a65d3f0a164d1d31b1c (patch)
treeefff9d60e5c128c054f848fa802e9f077cad54b1
parent3d17bf9ae1ba67cd091794839d4d5f4c393a0e2c (diff)
rename vest to dpmert (dynamic programming mert), rename variables and types to correspond to standard geometric concepts
-rw-r--r--Makefile.am2
-rw-r--r--configure.ac2
-rw-r--r--dpmert/Makefile.am35
-rw-r--r--dpmert/README.shared-mem (renamed from vest/README.shared-mem)0
-rwxr-xr-xdpmert/cat.pl (renamed from vest/cat.pl)0
-rw-r--r--dpmert/ces.cc (renamed from vest/ces.cc)8
-rw-r--r--dpmert/ces.h (renamed from vest/ces.h)4
-rwxr-xr-xdpmert/dpmert.pl (renamed from vest/dist-vest.pl)14
-rw-r--r--dpmert/error_surface.cc (renamed from vest/error_surface.cc)0
-rw-r--r--dpmert/error_surface.h (renamed from vest/error_surface.h)0
-rw-r--r--dpmert/libcall.pl (renamed from vest/libcall.pl)0
-rwxr-xr-xdpmert/line_mediator.pl (renamed from vest/line_mediator.pl)0
-rw-r--r--dpmert/line_optimizer.cc (renamed from vest/line_optimizer.cc)0
-rw-r--r--dpmert/line_optimizer.h (renamed from vest/line_optimizer.h)0
-rw-r--r--dpmert/lo_test.cc (renamed from vest/lo_test.cc)44
-rw-r--r--dpmert/mert_geometry.cc186
-rw-r--r--dpmert/mert_geometry.h81
-rw-r--r--dpmert/mr_dpmert_generate_mapper_input.cc (renamed from vest/mr_vest_generate_mapper_input.cc)0
-rw-r--r--dpmert/mr_dpmert_map.cc (renamed from vest/mr_vest_map.cc)10
-rw-r--r--dpmert/mr_dpmert_reduce.cc (renamed from vest/mr_vest_reduce.cc)0
-rwxr-xr-xdpmert/parallelize.pl (renamed from vest/parallelize.pl)0
-rw-r--r--dpmert/sentclient.c (renamed from vest/sentclient.c)0
-rw-r--r--dpmert/sentserver.c (renamed from vest/sentserver.c)0
-rw-r--r--dpmert/sentserver.h (renamed from vest/sentserver.h)0
-rwxr-xr-xdpmert/tac.pl (renamed from vest/tac.pl)0
-rw-r--r--dpmert/test_aer/README (renamed from vest/test_aer/README)0
-rw-r--r--dpmert/test_aer/cdec.ini (renamed from vest/test_aer/cdec.ini)0
-rw-r--r--dpmert/test_aer/corpus.src (renamed from vest/test_aer/corpus.src)0
-rw-r--r--dpmert/test_aer/grammar (renamed from vest/test_aer/grammar)0
-rw-r--r--dpmert/test_aer/ref.0 (renamed from vest/test_aer/ref.0)0
-rw-r--r--dpmert/test_aer/weights (renamed from vest/test_aer/weights)0
-rw-r--r--dpmert/test_data/0.json.gz (renamed from vest/test_data/0.json.gz)bin13709 -> 13709 bytes
-rw-r--r--dpmert/test_data/1.json.gz (renamed from vest/test_data/1.json.gz)bin204803 -> 204803 bytes
-rw-r--r--dpmert/test_data/c2e.txt.0 (renamed from vest/test_data/c2e.txt.0)0
-rw-r--r--dpmert/test_data/c2e.txt.1 (renamed from vest/test_data/c2e.txt.1)0
-rw-r--r--dpmert/test_data/c2e.txt.2 (renamed from vest/test_data/c2e.txt.2)0
-rw-r--r--dpmert/test_data/c2e.txt.3 (renamed from vest/test_data/c2e.txt.3)0
-rw-r--r--dpmert/test_data/re.txt.0 (renamed from vest/test_data/re.txt.0)0
-rw-r--r--dpmert/test_data/re.txt.1 (renamed from vest/test_data/re.txt.1)0
-rw-r--r--dpmert/test_data/re.txt.2 (renamed from vest/test_data/re.txt.2)0
-rw-r--r--dpmert/test_data/re.txt.3 (renamed from vest/test_data/re.txt.3)0
-rw-r--r--vest/Makefile.am35
-rw-r--r--vest/viterbi_envelope.cc177
-rw-r--r--vest/viterbi_envelope.h81
44 files changed, 344 insertions, 335 deletions
diff --git a/Makefile.am b/Makefile.am
index 59c2fc0a..c0fcb1f6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,7 +1,7 @@
# warning - the subdirectories in the following list should
# be kept in topologically sorted order. Also, DO NOT introduce
# cyclic dependencies between these directories!
-SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training mira vest pro-train extools gi/pf gi/markov_al
+SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training mira dpmert pro-train extools gi/pf gi/markov_al
#gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava
diff --git a/configure.ac b/configure.ac
index 131a1705..cd78ee72 100644
--- a/configure.ac
+++ b/configure.ac
@@ -113,4 +113,4 @@ then
AM_CONDITIONAL([GLC], true)
fi
-AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile vest/Makefile pro-train/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile)
+AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile dpmert/Makefile pro-train/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile)
diff --git a/dpmert/Makefile.am b/dpmert/Makefile.am
new file mode 100644
index 00000000..2676fb50
--- /dev/null
+++ b/dpmert/Makefile.am
@@ -0,0 +1,35 @@
+bin_PROGRAMS = \
+ mr_dpmert_map \
+ mr_dpmert_reduce \
+ mr_dpmert_generate_mapper_input \
+ sentserver \
+ sentclient
+
+if HAVE_GTEST
+noinst_PROGRAMS = \
+ lo_test
+TESTS = lo_test
+endif
+
+sentserver_SOURCES = sentserver.c
+sentserver_LDFLAGS = -all-static -pthread
+
+sentclient_SOURCES = sentclient.c
+sentclient_LDFLAGS = -all-static -pthread
+
+mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc
+mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+
+# nbest2hg_SOURCES = nbest2hg.cc
+# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz
+
+mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc
+mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+
+mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc
+mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+
+lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc
+lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/vest/README.shared-mem b/dpmert/README.shared-mem
index 7728efc0..7728efc0 100644
--- a/vest/README.shared-mem
+++ b/dpmert/README.shared-mem
diff --git a/vest/cat.pl b/dpmert/cat.pl
index 2ecba3f9..2ecba3f9 100755
--- a/vest/cat.pl
+++ b/dpmert/cat.pl
diff --git a/vest/ces.cc b/dpmert/ces.cc
index cd89aa69..a85454da 100644
--- a/vest/ces.cc
+++ b/dpmert/ces.cc
@@ -7,7 +7,7 @@
// TODO, if AER is to be optimized again, we will need this
// #include "aligner.h"
#include "lattice.h"
-#include "viterbi_envelope.h"
+#include "mert_geometry.h"
#include "error_surface.h"
#include "ns.h"
@@ -17,17 +17,17 @@ using namespace std;
const bool minimize_segments = true; // if adjacent segments have equal scores, merge them
void ComputeErrorSurface(const SegmentEvaluator& ss,
- const ViterbiEnvelope& ve,
+ const ConvexHull& ve,
ErrorSurface* env,
const EvaluationMetric* metric,
const Hypergraph& hg) {
vector<WordID> prev_trans;
- const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs();
+ const vector<shared_ptr<MERTPoint> >& ienv = ve.GetSortedSegs();
env->resize(ienv.size());
SufficientStats prev_score; // defaults to 0
int j = 0;
for (int i = 0; i < ienv.size(); ++i) {
- const Segment& seg = *ienv[i];
+ const MERTPoint& seg = *ienv[i];
vector<WordID> trans;
#if 0
if (type == AER) {
diff --git a/vest/ces.h b/dpmert/ces.h
index e021e715..e4fa2080 100644
--- a/vest/ces.h
+++ b/dpmert/ces.h
@@ -1,14 +1,14 @@
#ifndef _CES_H_
#define _CES_H_
-class ViterbiEnvelope;
+class ConvexHull;
class Hypergraph;
class SegmentEvaluator;
class ErrorSurface;
class EvaluationMetric;
void ComputeErrorSurface(const SegmentEvaluator& ss,
- const ViterbiEnvelope& ve,
+ const ConvexHull& convex_hull,
ErrorSurface* es,
const EvaluationMetric* metric,
const Hypergraph& hg);
diff --git a/vest/dist-vest.pl b/dpmert/dpmert.pl
index 1ec8c6b1..52ce0fc0 100755
--- a/vest/dist-vest.pl
+++ b/dpmert/dpmert.pl
@@ -21,9 +21,9 @@ my $bin_dir = $SCRIPT_DIR;
die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
my $FAST_SCORE="$bin_dir/../mteval/fast_score";
die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
-my $MAPINPUT = "$bin_dir/mr_vest_generate_mapper_input";
-my $MAPPER = "$bin_dir/mr_vest_map";
-my $REDUCER = "$bin_dir/mr_vest_reduce";
+my $MAPINPUT = "$bin_dir/mr_dpmert_generate_mapper_input";
+my $MAPPER = "$bin_dir/mr_dpmert_map";
+my $REDUCER = "$bin_dir/mr_dpmert_reduce";
my $parallelize = "$bin_dir/parallelize.pl";
my $libcall = "$bin_dir/libcall.pl";
my $sentserver = "$bin_dir/sentserver";
@@ -136,7 +136,7 @@ if ($metric =~ /^ter$|^aer$/i) {
my $refs_comma_sep = get_comma_sep_refs('r',$refFiles);
unless ($dir){
- $dir = "vest";
+ $dir = "dpmert";
}
unless ($dir =~ /^\//){ # convert relative path to absolute path
my $basedir = check_output("pwd");
@@ -197,14 +197,14 @@ if ($dryrun){
write_config(*STDERR);
exit 0;
} else {
- if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-vest.pl outputs
+ if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-dpmert.pl outputs
die "ERROR: working dir $dir already exists\n\n";
} else {
-e $dir || mkdir $dir;
mkdir "$dir/hgs";
modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin;
mkdir "$dir/scripts";
- my $cmdfile="$dir/rerun-vest.sh";
+ my $cmdfile="$dir/rerun-dpmert.sh";
open CMD,'>',$cmdfile;
print CMD "cd ",&getcwd,"\n";
# print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted.
@@ -342,7 +342,7 @@ while (1){
my $mapoutput = $shard;
my $client_name = $shard;
$client_name =~ s/mapinput.//;
- $client_name = "vest.$client_name";
+ $client_name = "dpmert.$client_name";
$mapoutput =~ s/mapinput/mapoutput/;
push @mapoutputs, "$dir/splag.$im1/$mapoutput";
$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
diff --git a/vest/error_surface.cc b/dpmert/error_surface.cc
index 515b67f8..515b67f8 100644
--- a/vest/error_surface.cc
+++ b/dpmert/error_surface.cc
diff --git a/vest/error_surface.h b/dpmert/error_surface.h
index bb65847b..bb65847b 100644
--- a/vest/error_surface.h
+++ b/dpmert/error_surface.h
diff --git a/vest/libcall.pl b/dpmert/libcall.pl
index c7d0f128..c7d0f128 100644
--- a/vest/libcall.pl
+++ b/dpmert/libcall.pl
diff --git a/vest/line_mediator.pl b/dpmert/line_mediator.pl
index bc2bb24c..bc2bb24c 100755
--- a/vest/line_mediator.pl
+++ b/dpmert/line_mediator.pl
diff --git a/vest/line_optimizer.cc b/dpmert/line_optimizer.cc
index 49443fbe..49443fbe 100644
--- a/vest/line_optimizer.cc
+++ b/dpmert/line_optimizer.cc
diff --git a/vest/line_optimizer.h b/dpmert/line_optimizer.h
index 83819f41..83819f41 100644
--- a/vest/line_optimizer.h
+++ b/dpmert/line_optimizer.h
diff --git a/vest/lo_test.cc b/dpmert/lo_test.cc
index a67f65e1..d9b909b8 100644
--- a/vest/lo_test.cc
+++ b/dpmert/lo_test.cc
@@ -15,7 +15,7 @@
#include "filelib.h"
#include "inside_outside.h"
#include "viterbi.h"
-#include "viterbi_envelope.h"
+#include "mert_geometry.h"
#include "line_optimizer.h"
using namespace std;
@@ -43,23 +43,23 @@ TEST_F(OptTest, TestCheckNaN) {
EXPECT_EQ(true, isnan(z));
}
-TEST_F(OptTest,TestViterbiEnvelope) {
- shared_ptr<Segment> a1(new Segment(-1, 0));
- shared_ptr<Segment> b1(new Segment(1, 0));
- shared_ptr<Segment> a2(new Segment(-1, 1));
- shared_ptr<Segment> b2(new Segment(1, -1));
- vector<shared_ptr<Segment> > sa; sa.push_back(a1); sa.push_back(b1);
- vector<shared_ptr<Segment> > sb; sb.push_back(a2); sb.push_back(b2);
- ViterbiEnvelope a(sa);
+TEST_F(OptTest,TestConvexHull) {
+ shared_ptr<MERTPoint> a1(new MERTPoint(-1, 0));
+ shared_ptr<MERTPoint> b1(new MERTPoint(1, 0));
+ shared_ptr<MERTPoint> a2(new MERTPoint(-1, 1));
+ shared_ptr<MERTPoint> b2(new MERTPoint(1, -1));
+ vector<shared_ptr<MERTPoint> > sa; sa.push_back(a1); sa.push_back(b1);
+ vector<shared_ptr<MERTPoint> > sb; sb.push_back(a2); sb.push_back(b2);
+ ConvexHull a(sa);
cerr << a << endl;
- ViterbiEnvelope b(sb);
- ViterbiEnvelope c = a;
+ ConvexHull b(sb);
+ ConvexHull c = a;
c *= b;
cerr << a << " (*) " << b << " = " << c << endl;
EXPECT_EQ(3, c.size());
}
-TEST_F(OptTest,TestViterbiEnvelopeInside) {
+TEST_F(OptTest,TestConvexHullInside) {
const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}";
Hypergraph hg;
istringstream instr(json);
@@ -78,10 +78,10 @@ TEST_F(OptTest,TestViterbiEnvelopeInside) {
cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl;
}
SparseVector<double> dir; dir.set_value(FD::Convert("f1"), 1.0);
- ViterbiEnvelopeWeightFunction wf(wts, dir);
- ViterbiEnvelope env = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
+ ConvexHullWeightFunction wf(wts, dir);
+ ConvexHull env = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
cerr << env << endl;
- const vector<boost::shared_ptr<Segment> >& segs = env.GetSortedSegs();
+ const vector<boost::shared_ptr<MERTPoint> >& segs = env.GetSortedSegs();
dir *= segs[1]->x;
wts += dir;
hg.Reweight(wts);
@@ -142,7 +142,7 @@ TEST_F(OptTest, TestS1) {
TD::ConvertSentence(ref22, &refs2[1]);
TD::ConvertSentence(ref32, &refs2[2]);
TD::ConvertSentence(ref42, &refs2[3]);
- vector<ViterbiEnvelope> envs(2);
+ vector<ConvexHull> envs(2);
RandomNumberGenerator<boost::mt19937> rng;
@@ -160,9 +160,9 @@ TEST_F(OptTest, TestS1) {
cerr << "Computing Viterbi envelope using inside algorithm...\n";
cerr << "axis: " << axis << endl;
clock_t t_start=clock();
- ViterbiEnvelopeWeightFunction wf(wts, axis); // wts = starting point, axis = search direction
- envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
- envs[1] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg2, NULL, wf);
+ ConvexHullWeightFunction wf(wts, axis); // wts = starting point, axis = search direction
+ envs[0] = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
+ envs[1] = Inside<ConvexHull, ConvexHullWeightFunction>(hg2, NULL, wf);
vector<ErrorSurface> es(2);
EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU");
@@ -214,9 +214,9 @@ TEST_F(OptTest,TestZeroOrigin) {
}
SparseVector<double> axis; axis.set_value(FD::Convert("Glue"),1.0);
- ViterbiEnvelopeWeightFunction wf(wts, axis); // wts = starting point, axis = search direction
- vector<ViterbiEnvelope> envs(1);
- envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
+ ConvexHullWeightFunction wf(wts, axis); // wts = starting point, axis = search direction
+ vector<ConvexHull> envs(1);
+ envs[0] = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
vector<vector<WordID> > mr(4);
TD::ConvertSentence("untitled", &mr[0]);
diff --git a/dpmert/mert_geometry.cc b/dpmert/mert_geometry.cc
new file mode 100644
index 00000000..81b25af9
--- /dev/null
+++ b/dpmert/mert_geometry.cc
@@ -0,0 +1,186 @@
+#include "mert_geometry.h"
+
+#include <cassert>
+#include <limits>
+
+using namespace std;
+using boost::shared_ptr;
+
+ConvexHull::ConvexHull(int i) {
+ if (i == 0) {
+ // do nothing - <>
+ } else if (i == 1) {
+ points.push_back(shared_ptr<MERTPoint>(new MERTPoint(0, 0, 0, shared_ptr<MERTPoint>(), shared_ptr<MERTPoint>())));
+ assert(this->IsMultiplicativeIdentity());
+ } else {
+ cerr << "Only can create ConvexHull semiring 0 and 1 with this constructor!\n";
+ abort();
+ }
+}
+
+const ConvexHull ConvexHullWeightFunction::operator()(const Hypergraph::Edge& e) const {
+ const double m = direction.dot(e.feature_values_);
+ const double b = origin.dot(e.feature_values_);
+ MERTPoint* point = new MERTPoint(m, b, e);
+ return ConvexHull(1, point);
+}
+
+ostream& operator<<(ostream& os, const ConvexHull& env) {
+ os << '<';
+ const vector<shared_ptr<MERTPoint> >& points = env.GetSortedSegs();
+ for (int i = 0; i < points.size(); ++i)
+ os << (i==0 ? "" : "|") << "x=" << points[i]->x << ",b=" << points[i]->b << ",m=" << points[i]->m << ",p1=" << points[i]->p1 << ",p2=" << points[i]->p2;
+ return os << '>';
+}
+
+#define ORIGINAL_MERT_IMPLEMENTATION 1
+#ifdef ORIGINAL_MERT_IMPLEMENTATION
+
+struct SlopeCompare {
+ bool operator() (const shared_ptr<MERTPoint>& a, const shared_ptr<MERTPoint>& b) const {
+ return a->m < b->m;
+ }
+};
+
+const ConvexHull& ConvexHull::operator+=(const ConvexHull& other) {
+ if (!other.is_sorted) other.Sort();
+ if (points.empty()) {
+ points = other.points;
+ return *this;
+ }
+ is_sorted = false;
+ int j = points.size();
+ points.resize(points.size() + other.points.size());
+ for (int i = 0; i < other.points.size(); ++i)
+ points[j++] = other.points[i];
+ assert(j == points.size());
+ return *this;
+}
+
+void ConvexHull::Sort() const {
+ sort(points.begin(), points.end(), SlopeCompare());
+ const int k = points.size();
+ int j = 0;
+ for (int i = 0; i < k; ++i) {
+ MERTPoint l = *points[i];
+ l.x = kMinusInfinity;
+ // cerr << "m=" << l.m << endl;
+ if (0 < j) {
+ if (points[j-1]->m == l.m) { // lines are parallel
+ if (l.b <= points[j-1]->b) continue;
+ --j;
+ }
+ while(0 < j) {
+ l.x = (l.b - points[j-1]->b) / (points[j-1]->m - l.m);
+ if (points[j-1]->x < l.x) break;
+ --j;
+ }
+ if (0 == j) l.x = kMinusInfinity;
+ }
+ *points[j++] = l;
+ }
+ points.resize(j);
+ is_sorted = true;
+}
+
+const ConvexHull& ConvexHull::operator*=(const ConvexHull& other) {
+ if (other.IsMultiplicativeIdentity()) { return *this; }
+ if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; }
+
+ if (!is_sorted) Sort();
+ if (!other.is_sorted) other.Sort();
+
+ if (this->IsEdgeEnvelope()) {
+// if (other.size() > 1)
+// cerr << *this << " (TIMES) " << other << endl;
+ shared_ptr<MERTPoint> edge_parent = points[0];
+ const double& edge_b = edge_parent->b;
+ const double& edge_m = edge_parent->m;
+ points.clear();
+ for (int i = 0; i < other.points.size(); ++i) {
+ const MERTPoint& p = *other.points[i];
+ const double m = p.m + edge_m;
+ const double b = p.b + edge_b;
+ const double& x = p.x; // x's don't change with *
+ points.push_back(shared_ptr<MERTPoint>(new MERTPoint(x, m, b, edge_parent, other.points[i])));
+ assert(points.back()->p1->edge);
+ }
+// if (other.size() > 1)
+// cerr << " = " << *this << endl;
+ } else {
+ vector<shared_ptr<MERTPoint> > new_points;
+ int this_i = 0;
+ int other_i = 0;
+ const int this_size = points.size();
+ const int other_size = other.points.size();
+ double cur_x = kMinusInfinity; // moves from left to right across the
+ // real numbers, stopping for all inter-
+ // sections
+ double this_next_val = (1 < this_size ? points[1]->x : kPlusInfinity);
+ double other_next_val = (1 < other_size ? other.points[1]->x : kPlusInfinity);
+ while (this_i < this_size && other_i < other_size) {
+ const MERTPoint& this_point = *points[this_i];
+ const MERTPoint& other_point= *other.points[other_i];
+ const double m = this_point.m + other_point.m;
+ const double b = this_point.b + other_point.b;
+
+ new_points.push_back(shared_ptr<MERTPoint>(new MERTPoint(cur_x, m, b, points[this_i], other.points[other_i])));
+ int comp = 0;
+ if (this_next_val < other_next_val) comp = -1; else
+ if (this_next_val > other_next_val) comp = 1;
+ if (0 == comp) { // the next values are equal, advance both indices
+ ++this_i;
+ ++other_i;
+ cur_x = this_next_val; // could be other_next_val (they're equal!)
+ this_next_val = (this_i+1 < this_size ? points[this_i+1]->x : kPlusInfinity);
+ other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity);
+ } else { // advance the i with the lower x, update cur_x
+ if (-1 == comp) {
+ ++this_i;
+ cur_x = this_next_val;
+ this_next_val = (this_i+1 < this_size ? points[this_i+1]->x : kPlusInfinity);
+ } else {
+ ++other_i;
+ cur_x = other_next_val;
+ other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity);
+ }
+ }
+ }
+ points.swap(new_points);
+ }
+ //cerr << "Multiply: result=" << (*this) << endl;
+ return *this;
+}
+
+// recursively construct translation
+void MERTPoint::ConstructTranslation(vector<WordID>* trans) const {
+ const MERTPoint* cur = this;
+ vector<vector<WordID> > ant_trans;
+ while(!cur->edge) {
+ ant_trans.resize(ant_trans.size() + 1);
+ cur->p2->ConstructTranslation(&ant_trans.back());
+ cur = cur->p1.get();
+ }
+ size_t ant_size = ant_trans.size();
+ vector<const vector<WordID>*> pants(ant_size);
+ assert(ant_size == cur->edge->tail_nodes_.size());
+ --ant_size;
+ for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i];
+ cur->edge->rule_->ESubstitute(pants, trans);
+}
+
+void MERTPoint::CollectEdgesUsed(std::vector<bool>* edges_used) const {
+ if (edge) {
+ assert(edge->id_ < edges_used->size());
+ (*edges_used)[edge->id_] = true;
+ }
+ if (p1) p1->CollectEdgesUsed(edges_used);
+ if (p2) p2->CollectEdgesUsed(edges_used);
+}
+
+#else
+
+// THIS IS THE NEW FASTER IMPLEMENTATION OF THE MERT SEMIRING OPERATIONS
+
+#endif
+
diff --git a/dpmert/mert_geometry.h b/dpmert/mert_geometry.h
new file mode 100644
index 00000000..a8b6959e
--- /dev/null
+++ b/dpmert/mert_geometry.h
@@ -0,0 +1,81 @@
+#ifndef _MERT_GEOMETRY_H_
+#define _MERT_GEOMETRY_H_
+
+#include <vector>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+
+#include "hg.h"
+#include "sparse_vector.h"
+
+static const double kMinusInfinity = -std::numeric_limits<double>::infinity();
+static const double kPlusInfinity = std::numeric_limits<double>::infinity();
+
+struct MERTPoint {
+ MERTPoint() : x(), m(), b(), edge() {}
+ MERTPoint(double _m, double _b) :
+ x(kMinusInfinity), m(_m), b(_b), edge() {}
+ MERTPoint(double _x, double _m, double _b, const boost::shared_ptr<MERTPoint>& p1_, const boost::shared_ptr<MERTPoint>& p2_) :
+ x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {}
+ MERTPoint(double _m, double _b, const Hypergraph::Edge& edge) :
+ x(kMinusInfinity), m(_m), b(_b), edge(&edge) {}
+
+ double x; // x intersection with previous segment in env, or -inf if none
+ double m; // this line's slope
+ double b; // intercept with y-axis
+
+ // we keep a pointer to the "parents" of this segment so we can reconstruct
+ // the Viterbi translation corresponding to this segment
+ boost::shared_ptr<MERTPoint> p1;
+ boost::shared_ptr<MERTPoint> p2;
+
+ // only MERTPoints created from an edge using the ConvexHullWeightFunction
+ // have rules
+ // TRulePtr rule;
+ const Hypergraph::Edge* edge;
+
+ // recursively recover the Viterbi translation that will result from setting
+ // the weights to origin + axis * x, where x is any value from this->x up
+ // until the next largest x in the containing ConvexHull
+ void ConstructTranslation(std::vector<WordID>* trans) const;
+ void CollectEdgesUsed(std::vector<bool>* edges_used) const;
+};
+
+// this is the semiring value type,
+// it defines constructors for 0, 1, and the operations + and *
+struct ConvexHull {
+ // create semiring zero
+ ConvexHull() : is_sorted(true) {} // zero
+ // for debugging:
+ ConvexHull(const std::vector<boost::shared_ptr<MERTPoint> >& s) : points(s) { Sort(); }
+ // create semiring 1 or 0
+ explicit ConvexHull(int i);
+ ConvexHull(int n, MERTPoint* point) : is_sorted(true), points(n, boost::shared_ptr<MERTPoint>(point)) {}
+ const ConvexHull& operator+=(const ConvexHull& other);
+ const ConvexHull& operator*=(const ConvexHull& other);
+ bool IsMultiplicativeIdentity() const {
+ return size() == 1 && (points[0]->b == 0.0 && points[0]->m == 0.0) && (!points[0]->edge) && (!points[0]->p1) && (!points[0]->p2); }
+ const std::vector<boost::shared_ptr<MERTPoint> >& GetSortedSegs() const {
+ if (!is_sorted) Sort();
+ return points;
+ }
+ size_t size() const { return points.size(); }
+
+ private:
+ bool IsEdgeEnvelope() const {
+ return points.size() == 1 && points[0]->edge; }
+ void Sort() const;
+ mutable bool is_sorted;
+ mutable std::vector<boost::shared_ptr<MERTPoint> > points;
+};
+std::ostream& operator<<(std::ostream& os, const ConvexHull& env);
+
+struct ConvexHullWeightFunction {
+ ConvexHullWeightFunction(const SparseVector<double>& ori,
+ const SparseVector<double>& dir) : origin(ori), direction(dir) {}
+ const ConvexHull operator()(const Hypergraph::Edge& e) const;
+ const SparseVector<double> origin;
+ const SparseVector<double> direction;
+};
+
+#endif
diff --git a/vest/mr_vest_generate_mapper_input.cc b/dpmert/mr_dpmert_generate_mapper_input.cc
index 59d4f24f..59d4f24f 100644
--- a/vest/mr_vest_generate_mapper_input.cc
+++ b/dpmert/mr_dpmert_generate_mapper_input.cc
diff --git a/vest/mr_vest_map.cc b/dpmert/mr_dpmert_map.cc
index 7d9625bc..f3304f0f 100644
--- a/vest/mr_vest_map.cc
+++ b/dpmert/mr_dpmert_map.cc
@@ -12,7 +12,7 @@
#include "filelib.h"
#include "stringlib.h"
#include "sparse_vector.h"
-#include "viterbi_envelope.h"
+#include "mert_geometry.h"
#include "inside_outside.h"
#include "error_surface.h"
#include "b64tools.h"
@@ -95,11 +95,11 @@ int main(int argc, char** argv) {
ReadFile rf(file);
HypergraphIO::ReadFromJSON(rf.stream(), &hg);
}
- ViterbiEnvelopeWeightFunction wf(origin, direction);
- ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf);
- ErrorSurface es;
+ const ConvexHullWeightFunction wf(origin, direction);
+ const ConvexHull hull = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf);
- ComputeErrorSurface(*ds[sent_id], ve, &es, metric, hg);
+ ErrorSurface es;
+ ComputeErrorSurface(*ds[sent_id], hull, &es, metric, hg);
//cerr << "Viterbi envelope has " << ve.size() << " segments\n";
// cerr << "Error surface has " << es.size() << " segments\n";
string val;
diff --git a/vest/mr_vest_reduce.cc b/dpmert/mr_dpmert_reduce.cc
index dda61f88..dda61f88 100644
--- a/vest/mr_vest_reduce.cc
+++ b/dpmert/mr_dpmert_reduce.cc
diff --git a/vest/parallelize.pl b/dpmert/parallelize.pl
index 7d0365cc..7d0365cc 100755
--- a/vest/parallelize.pl
+++ b/dpmert/parallelize.pl
diff --git a/vest/sentclient.c b/dpmert/sentclient.c
index 91d994ab..91d994ab 100644
--- a/vest/sentclient.c
+++ b/dpmert/sentclient.c
diff --git a/vest/sentserver.c b/dpmert/sentserver.c
index c20b4fa6..c20b4fa6 100644
--- a/vest/sentserver.c
+++ b/dpmert/sentserver.c
diff --git a/vest/sentserver.h b/dpmert/sentserver.h
index cd17a546..cd17a546 100644
--- a/vest/sentserver.h
+++ b/dpmert/sentserver.h
diff --git a/vest/tac.pl b/dpmert/tac.pl
index 9fb525c1..9fb525c1 100755
--- a/vest/tac.pl
+++ b/dpmert/tac.pl
diff --git a/vest/test_aer/README b/dpmert/test_aer/README
index 819b2e32..819b2e32 100644
--- a/vest/test_aer/README
+++ b/dpmert/test_aer/README
diff --git a/vest/test_aer/cdec.ini b/dpmert/test_aer/cdec.ini
index 08187848..08187848 100644
--- a/vest/test_aer/cdec.ini
+++ b/dpmert/test_aer/cdec.ini
diff --git a/vest/test_aer/corpus.src b/dpmert/test_aer/corpus.src
index 31b23971..31b23971 100644
--- a/vest/test_aer/corpus.src
+++ b/dpmert/test_aer/corpus.src
diff --git a/vest/test_aer/grammar b/dpmert/test_aer/grammar
index 9d857824..9d857824 100644
--- a/vest/test_aer/grammar
+++ b/dpmert/test_aer/grammar
diff --git a/vest/test_aer/ref.0 b/dpmert/test_aer/ref.0
index 734a9c5b..734a9c5b 100644
--- a/vest/test_aer/ref.0
+++ b/dpmert/test_aer/ref.0
diff --git a/vest/test_aer/weights b/dpmert/test_aer/weights
index afc9282e..afc9282e 100644
--- a/vest/test_aer/weights
+++ b/dpmert/test_aer/weights
diff --git a/vest/test_data/0.json.gz b/dpmert/test_data/0.json.gz
index 30f8dd77..30f8dd77 100644
--- a/vest/test_data/0.json.gz
+++ b/dpmert/test_data/0.json.gz
Binary files differ
diff --git a/vest/test_data/1.json.gz b/dpmert/test_data/1.json.gz
index c82cc179..c82cc179 100644
--- a/vest/test_data/1.json.gz
+++ b/dpmert/test_data/1.json.gz
Binary files differ
diff --git a/vest/test_data/c2e.txt.0 b/dpmert/test_data/c2e.txt.0
index 12c4abe9..12c4abe9 100644
--- a/vest/test_data/c2e.txt.0
+++ b/dpmert/test_data/c2e.txt.0
diff --git a/vest/test_data/c2e.txt.1 b/dpmert/test_data/c2e.txt.1
index 4ac12df1..4ac12df1 100644
--- a/vest/test_data/c2e.txt.1
+++ b/dpmert/test_data/c2e.txt.1
diff --git a/vest/test_data/c2e.txt.2 b/dpmert/test_data/c2e.txt.2
index 2f67b72f..2f67b72f 100644
--- a/vest/test_data/c2e.txt.2
+++ b/dpmert/test_data/c2e.txt.2
diff --git a/vest/test_data/c2e.txt.3 b/dpmert/test_data/c2e.txt.3
index 5483cef6..5483cef6 100644
--- a/vest/test_data/c2e.txt.3
+++ b/dpmert/test_data/c2e.txt.3
diff --git a/vest/test_data/re.txt.0 b/dpmert/test_data/re.txt.0
index 86eff087..86eff087 100644
--- a/vest/test_data/re.txt.0
+++ b/dpmert/test_data/re.txt.0
diff --git a/vest/test_data/re.txt.1 b/dpmert/test_data/re.txt.1
index 2140f198..2140f198 100644
--- a/vest/test_data/re.txt.1
+++ b/dpmert/test_data/re.txt.1
diff --git a/vest/test_data/re.txt.2 b/dpmert/test_data/re.txt.2
index 94e46286..94e46286 100644
--- a/vest/test_data/re.txt.2
+++ b/dpmert/test_data/re.txt.2
diff --git a/vest/test_data/re.txt.3 b/dpmert/test_data/re.txt.3
index f87c3308..f87c3308 100644
--- a/vest/test_data/re.txt.3
+++ b/dpmert/test_data/re.txt.3
diff --git a/vest/Makefile.am b/vest/Makefile.am
deleted file mode 100644
index 05fa5639..00000000
--- a/vest/Makefile.am
+++ /dev/null
@@ -1,35 +0,0 @@
-bin_PROGRAMS = \
- mr_vest_map \
- mr_vest_reduce \
- mr_vest_generate_mapper_input \
- sentserver \
- sentclient
-
-if HAVE_GTEST
-noinst_PROGRAMS = \
- lo_test
-TESTS = lo_test
-endif
-
-sentserver_SOURCES = sentserver.c
-sentserver_LDFLAGS = -all-static -pthread
-
-sentclient_SOURCES = sentclient.c
-sentclient_LDFLAGS = -all-static -pthread
-
-mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc
-mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
-
-# nbest2hg_SOURCES = nbest2hg.cc
-# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz
-
-mr_vest_map_SOURCES = viterbi_envelope.cc ces.cc error_surface.cc mr_vest_map.cc line_optimizer.cc
-mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_vest_reduce_SOURCES = error_surface.cc ces.cc mr_vest_reduce.cc line_optimizer.cc viterbi_envelope.cc
-mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
-
-lo_test_SOURCES = lo_test.cc ces.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc
-lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval
diff --git a/vest/viterbi_envelope.cc b/vest/viterbi_envelope.cc
deleted file mode 100644
index 9fcf75a0..00000000
--- a/vest/viterbi_envelope.cc
+++ /dev/null
@@ -1,177 +0,0 @@
-#include "viterbi_envelope.h"
-
-#include <cassert>
-#include <limits>
-
-using namespace std;
-using boost::shared_ptr;
-
-ostream& operator<<(ostream& os, const ViterbiEnvelope& env) {
- os << '<';
- const vector<shared_ptr<Segment> >& segs = env.GetSortedSegs();
- for (int i = 0; i < segs.size(); ++i)
- os << (i==0 ? "" : "|") << "x=" << segs[i]->x << ",b=" << segs[i]->b << ",m=" << segs[i]->m << ",p1=" << segs[i]->p1 << ",p2=" << segs[i]->p2;
- return os << '>';
-}
-
-ViterbiEnvelope::ViterbiEnvelope(int i) {
- if (i == 0) {
- // do nothing - <>
- } else if (i == 1) {
- segs.push_back(shared_ptr<Segment>(new Segment(0, 0, 0, shared_ptr<Segment>(), shared_ptr<Segment>())));
- assert(this->IsMultiplicativeIdentity());
- } else {
- cerr << "Only can create ViterbiEnvelope semiring 0 and 1 with this constructor!\n";
- abort();
- }
-}
-
-struct SlopeCompare {
- bool operator() (const shared_ptr<Segment>& a, const shared_ptr<Segment>& b) const {
- return a->m < b->m;
- }
-};
-
-const ViterbiEnvelope& ViterbiEnvelope::operator+=(const ViterbiEnvelope& other) {
- if (!other.is_sorted) other.Sort();
- if (segs.empty()) {
- segs = other.segs;
- return *this;
- }
- is_sorted = false;
- int j = segs.size();
- segs.resize(segs.size() + other.segs.size());
- for (int i = 0; i < other.segs.size(); ++i)
- segs[j++] = other.segs[i];
- assert(j == segs.size());
- return *this;
-}
-
-void ViterbiEnvelope::Sort() const {
- sort(segs.begin(), segs.end(), SlopeCompare());
- const int k = segs.size();
- int j = 0;
- for (int i = 0; i < k; ++i) {
- Segment l = *segs[i];
- l.x = kMinusInfinity;
- // cerr << "m=" << l.m << endl;
- if (0 < j) {
- if (segs[j-1]->m == l.m) { // lines are parallel
- if (l.b <= segs[j-1]->b) continue;
- --j;
- }
- while(0 < j) {
- l.x = (l.b - segs[j-1]->b) / (segs[j-1]->m - l.m);
- if (segs[j-1]->x < l.x) break;
- --j;
- }
- if (0 == j) l.x = kMinusInfinity;
- }
- *segs[j++] = l;
- }
- segs.resize(j);
- is_sorted = true;
-}
-
-const ViterbiEnvelope& ViterbiEnvelope::operator*=(const ViterbiEnvelope& other) {
- if (other.IsMultiplicativeIdentity()) { return *this; }
- if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; }
-
- if (!is_sorted) Sort();
- if (!other.is_sorted) other.Sort();
-
- if (this->IsEdgeEnvelope()) {
-// if (other.size() > 1)
-// cerr << *this << " (TIMES) " << other << endl;
- shared_ptr<Segment> edge_parent = segs[0];
- const double& edge_b = edge_parent->b;
- const double& edge_m = edge_parent->m;
- segs.clear();
- for (int i = 0; i < other.segs.size(); ++i) {
- const Segment& seg = *other.segs[i];
- const double m = seg.m + edge_m;
- const double b = seg.b + edge_b;
- const double& x = seg.x; // x's don't change with *
- segs.push_back(shared_ptr<Segment>(new Segment(x, m, b, edge_parent, other.segs[i])));
- assert(segs.back()->p1->edge);
- }
-// if (other.size() > 1)
-// cerr << " = " << *this << endl;
- } else {
- vector<shared_ptr<Segment> > new_segs;
- int this_i = 0;
- int other_i = 0;
- const int this_size = segs.size();
- const int other_size = other.segs.size();
- double cur_x = kMinusInfinity; // moves from left to right across the
- // real numbers, stopping for all inter-
- // sections
- double this_next_val = (1 < this_size ? segs[1]->x : kPlusInfinity);
- double other_next_val = (1 < other_size ? other.segs[1]->x : kPlusInfinity);
- while (this_i < this_size && other_i < other_size) {
- const Segment& this_seg = *segs[this_i];
- const Segment& other_seg= *other.segs[other_i];
- const double m = this_seg.m + other_seg.m;
- const double b = this_seg.b + other_seg.b;
-
- new_segs.push_back(shared_ptr<Segment>(new Segment(cur_x, m, b, segs[this_i], other.segs[other_i])));
- int comp = 0;
- if (this_next_val < other_next_val) comp = -1; else
- if (this_next_val > other_next_val) comp = 1;
- if (0 == comp) { // the next values are equal, advance both indices
- ++this_i;
- ++other_i;
- cur_x = this_next_val; // could be other_next_val (they're equal!)
- this_next_val = (this_i+1 < this_size ? segs[this_i+1]->x : kPlusInfinity);
- other_next_val = (other_i+1 < other_size ? other.segs[other_i+1]->x : kPlusInfinity);
- } else { // advance the i with the lower x, update cur_x
- if (-1 == comp) {
- ++this_i;
- cur_x = this_next_val;
- this_next_val = (this_i+1 < this_size ? segs[this_i+1]->x : kPlusInfinity);
- } else {
- ++other_i;
- cur_x = other_next_val;
- other_next_val = (other_i+1 < other_size ? other.segs[other_i+1]->x : kPlusInfinity);
- }
- }
- }
- segs.swap(new_segs);
- }
- //cerr << "Multiply: result=" << (*this) << endl;
- return *this;
-}
-
-// recursively construct translation
-void Segment::ConstructTranslation(vector<WordID>* trans) const {
- const Segment* cur = this;
- vector<vector<WordID> > ant_trans;
- while(!cur->edge) {
- ant_trans.resize(ant_trans.size() + 1);
- cur->p2->ConstructTranslation(&ant_trans.back());
- cur = cur->p1.get();
- }
- size_t ant_size = ant_trans.size();
- vector<const vector<WordID>*> pants(ant_size);
- assert(ant_size == cur->edge->tail_nodes_.size());
- --ant_size;
- for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i];
- cur->edge->rule_->ESubstitute(pants, trans);
-}
-
-void Segment::CollectEdgesUsed(std::vector<bool>* edges_used) const {
- if (edge) {
- assert(edge->id_ < edges_used->size());
- (*edges_used)[edge->id_] = true;
- }
- if (p1) p1->CollectEdgesUsed(edges_used);
- if (p2) p2->CollectEdgesUsed(edges_used);
-}
-
-ViterbiEnvelope ViterbiEnvelopeWeightFunction::operator()(const Hypergraph::Edge& e) const {
- const double m = direction.dot(e.feature_values_);
- const double b = origin.dot(e.feature_values_);
- Segment* seg = new Segment(m, b, e);
- return ViterbiEnvelope(1, seg);
-}
-
diff --git a/vest/viterbi_envelope.h b/vest/viterbi_envelope.h
deleted file mode 100644
index 60ad82d8..00000000
--- a/vest/viterbi_envelope.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef _VITERBI_ENVELOPE_H_
-#define _VITERBI_ENVELOPE_H_
-
-#include <vector>
-#include <iostream>
-#include <boost/shared_ptr.hpp>
-
-#include "hg.h"
-#include "sparse_vector.h"
-
-static const double kMinusInfinity = -std::numeric_limits<double>::infinity();
-static const double kPlusInfinity = std::numeric_limits<double>::infinity();
-
-struct Segment {
- Segment() : x(), m(), b(), edge() {}
- Segment(double _m, double _b) :
- x(kMinusInfinity), m(_m), b(_b), edge() {}
- Segment(double _x, double _m, double _b, const boost::shared_ptr<Segment>& p1_, const boost::shared_ptr<Segment>& p2_) :
- x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {}
- Segment(double _m, double _b, const Hypergraph::Edge& edge) :
- x(kMinusInfinity), m(_m), b(_b), edge(&edge) {}
-
- double x; // x intersection with previous segment in env, or -inf if none
- double m; // this line's slope
- double b; // intercept with y-axis
-
- // we keep a pointer to the "parents" of this segment so we can reconstruct
- // the Viterbi translation corresponding to this segment
- boost::shared_ptr<Segment> p1;
- boost::shared_ptr<Segment> p2;
-
- // only Segments created from an edge using the ViterbiEnvelopeWeightFunction
- // have rules
- // TRulePtr rule;
- const Hypergraph::Edge* edge;
-
- // recursively recover the Viterbi translation that will result from setting
- // the weights to origin + axis * x, where x is any value from this->x up
- // until the next largest x in the containing ViterbiEnvelope
- void ConstructTranslation(std::vector<WordID>* trans) const;
- void CollectEdgesUsed(std::vector<bool>* edges_used) const;
-};
-
-// this is the semiring value type,
-// it defines constructors for 0, 1, and the operations + and *
-struct ViterbiEnvelope {
- // create semiring zero
- ViterbiEnvelope() : is_sorted(true) {} // zero
- // for debugging:
- ViterbiEnvelope(const std::vector<boost::shared_ptr<Segment> >& s) : segs(s) { Sort(); }
- // create semiring 1 or 0
- explicit ViterbiEnvelope(int i);
- ViterbiEnvelope(int n, Segment* seg) : is_sorted(true), segs(n, boost::shared_ptr<Segment>(seg)) {}
- const ViterbiEnvelope& operator+=(const ViterbiEnvelope& other);
- const ViterbiEnvelope& operator*=(const ViterbiEnvelope& other);
- bool IsMultiplicativeIdentity() const {
- return size() == 1 && (segs[0]->b == 0.0 && segs[0]->m == 0.0) && (!segs[0]->edge) && (!segs[0]->p1) && (!segs[0]->p2); }
- const std::vector<boost::shared_ptr<Segment> >& GetSortedSegs() const {
- if (!is_sorted) Sort();
- return segs;
- }
- size_t size() const { return segs.size(); }
-
- private:
- bool IsEdgeEnvelope() const {
- return segs.size() == 1 && segs[0]->edge; }
- void Sort() const;
- mutable bool is_sorted;
- mutable std::vector<boost::shared_ptr<Segment> > segs;
-};
-std::ostream& operator<<(std::ostream& os, const ViterbiEnvelope& env);
-
-struct ViterbiEnvelopeWeightFunction {
- ViterbiEnvelopeWeightFunction(const SparseVector<double>& ori,
- const SparseVector<double>& dir) : origin(ori), direction(dir) {}
- ViterbiEnvelope operator()(const Hypergraph::Edge& e) const;
- const SparseVector<double> origin;
- const SparseVector<double> direction;
-};
-
-#endif