diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-01-27 14:49:08 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-01-27 14:49:08 -0500 |
commit | 89b662b51373f0f466d62a65d3f0a164d1d31b1c (patch) | |
tree | efff9d60e5c128c054f848fa802e9f077cad54b1 | |
parent | 3d17bf9ae1ba67cd091794839d4d5f4c393a0e2c (diff) |
rename vest to dpmert (dynamic programming mert), rename variables and types to correspond to standard geometric concepts
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | dpmert/Makefile.am | 35 | ||||
-rw-r--r-- | dpmert/README.shared-mem (renamed from vest/README.shared-mem) | 0 | ||||
-rwxr-xr-x | dpmert/cat.pl (renamed from vest/cat.pl) | 0 | ||||
-rw-r--r-- | dpmert/ces.cc (renamed from vest/ces.cc) | 8 | ||||
-rw-r--r-- | dpmert/ces.h (renamed from vest/ces.h) | 4 | ||||
-rwxr-xr-x | dpmert/dpmert.pl (renamed from vest/dist-vest.pl) | 14 | ||||
-rw-r--r-- | dpmert/error_surface.cc (renamed from vest/error_surface.cc) | 0 | ||||
-rw-r--r-- | dpmert/error_surface.h (renamed from vest/error_surface.h) | 0 | ||||
-rw-r--r-- | dpmert/libcall.pl (renamed from vest/libcall.pl) | 0 | ||||
-rwxr-xr-x | dpmert/line_mediator.pl (renamed from vest/line_mediator.pl) | 0 | ||||
-rw-r--r-- | dpmert/line_optimizer.cc (renamed from vest/line_optimizer.cc) | 0 | ||||
-rw-r--r-- | dpmert/line_optimizer.h (renamed from vest/line_optimizer.h) | 0 | ||||
-rw-r--r-- | dpmert/lo_test.cc (renamed from vest/lo_test.cc) | 44 | ||||
-rw-r--r-- | dpmert/mert_geometry.cc | 186 | ||||
-rw-r--r-- | dpmert/mert_geometry.h | 81 | ||||
-rw-r--r-- | dpmert/mr_dpmert_generate_mapper_input.cc (renamed from vest/mr_vest_generate_mapper_input.cc) | 0 | ||||
-rw-r--r-- | dpmert/mr_dpmert_map.cc (renamed from vest/mr_vest_map.cc) | 10 | ||||
-rw-r--r-- | dpmert/mr_dpmert_reduce.cc (renamed from vest/mr_vest_reduce.cc) | 0 | ||||
-rwxr-xr-x | dpmert/parallelize.pl (renamed from vest/parallelize.pl) | 0 | ||||
-rw-r--r-- | dpmert/sentclient.c (renamed from vest/sentclient.c) | 0 | ||||
-rw-r--r-- | dpmert/sentserver.c (renamed from vest/sentserver.c) | 0 | ||||
-rw-r--r-- | dpmert/sentserver.h (renamed from vest/sentserver.h) | 0 | ||||
-rwxr-xr-x | dpmert/tac.pl (renamed from vest/tac.pl) | 0 | ||||
-rw-r--r-- | dpmert/test_aer/README (renamed from vest/test_aer/README) | 0 | ||||
-rw-r--r-- | dpmert/test_aer/cdec.ini (renamed from vest/test_aer/cdec.ini) | 0 | ||||
-rw-r--r-- | dpmert/test_aer/corpus.src (renamed from vest/test_aer/corpus.src) | 0 | ||||
-rw-r--r-- | dpmert/test_aer/grammar (renamed from vest/test_aer/grammar) | 0 | ||||
-rw-r--r-- | dpmert/test_aer/ref.0 (renamed from vest/test_aer/ref.0) | 0 | ||||
-rw-r--r-- | dpmert/test_aer/weights (renamed from vest/test_aer/weights) | 0 | ||||
-rw-r--r-- | dpmert/test_data/0.json.gz (renamed from vest/test_data/0.json.gz) | bin | 13709 -> 13709 bytes | |||
-rw-r--r-- | dpmert/test_data/1.json.gz (renamed from vest/test_data/1.json.gz) | bin | 204803 -> 204803 bytes | |||
-rw-r--r-- | dpmert/test_data/c2e.txt.0 (renamed from vest/test_data/c2e.txt.0) | 0 | ||||
-rw-r--r-- | dpmert/test_data/c2e.txt.1 (renamed from vest/test_data/c2e.txt.1) | 0 | ||||
-rw-r--r-- | dpmert/test_data/c2e.txt.2 (renamed from vest/test_data/c2e.txt.2) | 0 | ||||
-rw-r--r-- | dpmert/test_data/c2e.txt.3 (renamed from vest/test_data/c2e.txt.3) | 0 | ||||
-rw-r--r-- | dpmert/test_data/re.txt.0 (renamed from vest/test_data/re.txt.0) | 0 | ||||
-rw-r--r-- | dpmert/test_data/re.txt.1 (renamed from vest/test_data/re.txt.1) | 0 | ||||
-rw-r--r-- | dpmert/test_data/re.txt.2 (renamed from vest/test_data/re.txt.2) | 0 | ||||
-rw-r--r-- | dpmert/test_data/re.txt.3 (renamed from vest/test_data/re.txt.3) | 0 | ||||
-rw-r--r-- | vest/Makefile.am | 35 | ||||
-rw-r--r-- | vest/viterbi_envelope.cc | 177 | ||||
-rw-r--r-- | vest/viterbi_envelope.h | 81 |
44 files changed, 344 insertions, 335 deletions
diff --git a/Makefile.am b/Makefile.am index 59c2fc0a..c0fcb1f6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,7 +1,7 @@ # warning - the subdirectories in the following list should # be kept in topologically sorted order. Also, DO NOT introduce # cyclic dependencies between these directories! -SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training mira vest pro-train extools gi/pf gi/markov_al +SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training mira dpmert pro-train extools gi/pf gi/markov_al #gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava diff --git a/configure.ac b/configure.ac index 131a1705..cd78ee72 100644 --- a/configure.ac +++ b/configure.ac @@ -113,4 +113,4 @@ then AM_CONDITIONAL([GLC], true) fi -AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile vest/Makefile pro-train/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile) +AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile dpmert/Makefile pro-train/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile) diff --git a/dpmert/Makefile.am b/dpmert/Makefile.am new file mode 100644 index 00000000..2676fb50 --- /dev/null +++ b/dpmert/Makefile.am @@ -0,0 +1,35 @@ +bin_PROGRAMS = \ + mr_dpmert_map \ + mr_dpmert_reduce \ + mr_dpmert_generate_mapper_input \ + sentserver \ + sentclient + +if HAVE_GTEST +noinst_PROGRAMS = \ + lo_test +TESTS = lo_test +endif + +sentserver_SOURCES = sentserver.c +sentserver_LDFLAGS = -all-static -pthread + +sentclient_SOURCES = sentclient.c +sentclient_LDFLAGS = -all-static -pthread + +mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc +mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +# nbest2hg_SOURCES = nbest2hg.cc +# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz + +mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc +mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc +mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc +lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/vest/README.shared-mem b/dpmert/README.shared-mem index 7728efc0..7728efc0 100644 --- a/vest/README.shared-mem +++ b/dpmert/README.shared-mem diff --git a/vest/cat.pl b/dpmert/cat.pl index 2ecba3f9..2ecba3f9 100755 --- a/vest/cat.pl +++ b/dpmert/cat.pl diff --git a/vest/ces.cc b/dpmert/ces.cc index cd89aa69..a85454da 100644 --- a/vest/ces.cc +++ b/dpmert/ces.cc @@ -7,7 +7,7 @@ // TODO, if AER is to be optimized again, we will need this // #include "aligner.h" #include "lattice.h" -#include "viterbi_envelope.h" +#include "mert_geometry.h" #include "error_surface.h" #include "ns.h" @@ -17,17 +17,17 @@ using namespace std; const bool minimize_segments = true; // if adjacent segments have equal scores, merge them void ComputeErrorSurface(const SegmentEvaluator& ss, - const ViterbiEnvelope& ve, + const ConvexHull& ve, ErrorSurface* env, const EvaluationMetric* metric, const Hypergraph& hg) { vector<WordID> prev_trans; - const vector<shared_ptr<Segment> >& ienv = ve.GetSortedSegs(); + const vector<shared_ptr<MERTPoint> >& ienv = ve.GetSortedSegs(); env->resize(ienv.size()); SufficientStats prev_score; // defaults to 0 int j = 0; for (int i = 0; i < ienv.size(); ++i) { - const Segment& seg = *ienv[i]; + const MERTPoint& seg = *ienv[i]; vector<WordID> trans; #if 0 if (type == AER) { diff --git a/vest/ces.h b/dpmert/ces.h index e021e715..e4fa2080 100644 --- a/vest/ces.h +++ b/dpmert/ces.h @@ -1,14 +1,14 @@ #ifndef _CES_H_ #define _CES_H_ -class ViterbiEnvelope; +class ConvexHull; class Hypergraph; class SegmentEvaluator; class ErrorSurface; class EvaluationMetric; void ComputeErrorSurface(const SegmentEvaluator& ss, - const ViterbiEnvelope& ve, + const ConvexHull& convex_hull, ErrorSurface* es, const EvaluationMetric* metric, const Hypergraph& hg); diff --git a/vest/dist-vest.pl b/dpmert/dpmert.pl index 1ec8c6b1..52ce0fc0 100755 --- a/vest/dist-vest.pl +++ b/dpmert/dpmert.pl @@ -21,9 +21,9 @@ my $bin_dir = $SCRIPT_DIR; die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; my $FAST_SCORE="$bin_dir/../mteval/fast_score"; die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; -my $MAPINPUT = "$bin_dir/mr_vest_generate_mapper_input"; -my $MAPPER = "$bin_dir/mr_vest_map"; -my $REDUCER = "$bin_dir/mr_vest_reduce"; +my $MAPINPUT = "$bin_dir/mr_dpmert_generate_mapper_input"; +my $MAPPER = "$bin_dir/mr_dpmert_map"; +my $REDUCER = "$bin_dir/mr_dpmert_reduce"; my $parallelize = "$bin_dir/parallelize.pl"; my $libcall = "$bin_dir/libcall.pl"; my $sentserver = "$bin_dir/sentserver"; @@ -136,7 +136,7 @@ if ($metric =~ /^ter$|^aer$/i) { my $refs_comma_sep = get_comma_sep_refs('r',$refFiles); unless ($dir){ - $dir = "vest"; + $dir = "dpmert"; } unless ($dir =~ /^\//){ # convert relative path to absolute path my $basedir = check_output("pwd"); @@ -197,14 +197,14 @@ if ($dryrun){ write_config(*STDERR); exit 0; } else { - if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-vest.pl outputs + if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-dpmert.pl outputs die "ERROR: working dir $dir already exists\n\n"; } else { -e $dir || mkdir $dir; mkdir "$dir/hgs"; modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin; mkdir "$dir/scripts"; - my $cmdfile="$dir/rerun-vest.sh"; + my $cmdfile="$dir/rerun-dpmert.sh"; open CMD,'>',$cmdfile; print CMD "cd ",&getcwd,"\n"; # print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted. @@ -342,7 +342,7 @@ while (1){ my $mapoutput = $shard; my $client_name = $shard; $client_name =~ s/mapinput.//; - $client_name = "vest.$client_name"; + $client_name = "dpmert.$client_name"; $mapoutput =~ s/mapinput/mapoutput/; push @mapoutputs, "$dir/splag.$im1/$mapoutput"; $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; diff --git a/vest/error_surface.cc b/dpmert/error_surface.cc index 515b67f8..515b67f8 100644 --- a/vest/error_surface.cc +++ b/dpmert/error_surface.cc diff --git a/vest/error_surface.h b/dpmert/error_surface.h index bb65847b..bb65847b 100644 --- a/vest/error_surface.h +++ b/dpmert/error_surface.h diff --git a/vest/libcall.pl b/dpmert/libcall.pl index c7d0f128..c7d0f128 100644 --- a/vest/libcall.pl +++ b/dpmert/libcall.pl diff --git a/vest/line_mediator.pl b/dpmert/line_mediator.pl index bc2bb24c..bc2bb24c 100755 --- a/vest/line_mediator.pl +++ b/dpmert/line_mediator.pl diff --git a/vest/line_optimizer.cc b/dpmert/line_optimizer.cc index 49443fbe..49443fbe 100644 --- a/vest/line_optimizer.cc +++ b/dpmert/line_optimizer.cc diff --git a/vest/line_optimizer.h b/dpmert/line_optimizer.h index 83819f41..83819f41 100644 --- a/vest/line_optimizer.h +++ b/dpmert/line_optimizer.h diff --git a/vest/lo_test.cc b/dpmert/lo_test.cc index a67f65e1..d9b909b8 100644 --- a/vest/lo_test.cc +++ b/dpmert/lo_test.cc @@ -15,7 +15,7 @@ #include "filelib.h" #include "inside_outside.h" #include "viterbi.h" -#include "viterbi_envelope.h" +#include "mert_geometry.h" #include "line_optimizer.h" using namespace std; @@ -43,23 +43,23 @@ TEST_F(OptTest, TestCheckNaN) { EXPECT_EQ(true, isnan(z)); } -TEST_F(OptTest,TestViterbiEnvelope) { - shared_ptr<Segment> a1(new Segment(-1, 0)); - shared_ptr<Segment> b1(new Segment(1, 0)); - shared_ptr<Segment> a2(new Segment(-1, 1)); - shared_ptr<Segment> b2(new Segment(1, -1)); - vector<shared_ptr<Segment> > sa; sa.push_back(a1); sa.push_back(b1); - vector<shared_ptr<Segment> > sb; sb.push_back(a2); sb.push_back(b2); - ViterbiEnvelope a(sa); +TEST_F(OptTest,TestConvexHull) { + shared_ptr<MERTPoint> a1(new MERTPoint(-1, 0)); + shared_ptr<MERTPoint> b1(new MERTPoint(1, 0)); + shared_ptr<MERTPoint> a2(new MERTPoint(-1, 1)); + shared_ptr<MERTPoint> b2(new MERTPoint(1, -1)); + vector<shared_ptr<MERTPoint> > sa; sa.push_back(a1); sa.push_back(b1); + vector<shared_ptr<MERTPoint> > sb; sb.push_back(a2); sb.push_back(b2); + ConvexHull a(sa); cerr << a << endl; - ViterbiEnvelope b(sb); - ViterbiEnvelope c = a; + ConvexHull b(sb); + ConvexHull c = a; c *= b; cerr << a << " (*) " << b << " = " << c << endl; EXPECT_EQ(3, c.size()); } -TEST_F(OptTest,TestViterbiEnvelopeInside) { +TEST_F(OptTest,TestConvexHullInside) { const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}"; Hypergraph hg; istringstream instr(json); @@ -78,10 +78,10 @@ TEST_F(OptTest,TestViterbiEnvelopeInside) { cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; } SparseVector<double> dir; dir.set_value(FD::Convert("f1"), 1.0); - ViterbiEnvelopeWeightFunction wf(wts, dir); - ViterbiEnvelope env = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf); + ConvexHullWeightFunction wf(wts, dir); + ConvexHull env = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf); cerr << env << endl; - const vector<boost::shared_ptr<Segment> >& segs = env.GetSortedSegs(); + const vector<boost::shared_ptr<MERTPoint> >& segs = env.GetSortedSegs(); dir *= segs[1]->x; wts += dir; hg.Reweight(wts); @@ -142,7 +142,7 @@ TEST_F(OptTest, TestS1) { TD::ConvertSentence(ref22, &refs2[1]); TD::ConvertSentence(ref32, &refs2[2]); TD::ConvertSentence(ref42, &refs2[3]); - vector<ViterbiEnvelope> envs(2); + vector<ConvexHull> envs(2); RandomNumberGenerator<boost::mt19937> rng; @@ -160,9 +160,9 @@ TEST_F(OptTest, TestS1) { cerr << "Computing Viterbi envelope using inside algorithm...\n"; cerr << "axis: " << axis << endl; clock_t t_start=clock(); - ViterbiEnvelopeWeightFunction wf(wts, axis); // wts = starting point, axis = search direction - envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf); - envs[1] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg2, NULL, wf); + ConvexHullWeightFunction wf(wts, axis); // wts = starting point, axis = search direction + envs[0] = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf); + envs[1] = Inside<ConvexHull, ConvexHullWeightFunction>(hg2, NULL, wf); vector<ErrorSurface> es(2); EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU"); @@ -214,9 +214,9 @@ TEST_F(OptTest,TestZeroOrigin) { } SparseVector<double> axis; axis.set_value(FD::Convert("Glue"),1.0); - ViterbiEnvelopeWeightFunction wf(wts, axis); // wts = starting point, axis = search direction - vector<ViterbiEnvelope> envs(1); - envs[0] = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf); + ConvexHullWeightFunction wf(wts, axis); // wts = starting point, axis = search direction + vector<ConvexHull> envs(1); + envs[0] = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf); vector<vector<WordID> > mr(4); TD::ConvertSentence("untitled", &mr[0]); diff --git a/dpmert/mert_geometry.cc b/dpmert/mert_geometry.cc new file mode 100644 index 00000000..81b25af9 --- /dev/null +++ b/dpmert/mert_geometry.cc @@ -0,0 +1,186 @@ +#include "mert_geometry.h" + +#include <cassert> +#include <limits> + +using namespace std; +using boost::shared_ptr; + +ConvexHull::ConvexHull(int i) { + if (i == 0) { + // do nothing - <> + } else if (i == 1) { + points.push_back(shared_ptr<MERTPoint>(new MERTPoint(0, 0, 0, shared_ptr<MERTPoint>(), shared_ptr<MERTPoint>()))); + assert(this->IsMultiplicativeIdentity()); + } else { + cerr << "Only can create ConvexHull semiring 0 and 1 with this constructor!\n"; + abort(); + } +} + +const ConvexHull ConvexHullWeightFunction::operator()(const Hypergraph::Edge& e) const { + const double m = direction.dot(e.feature_values_); + const double b = origin.dot(e.feature_values_); + MERTPoint* point = new MERTPoint(m, b, e); + return ConvexHull(1, point); +} + +ostream& operator<<(ostream& os, const ConvexHull& env) { + os << '<'; + const vector<shared_ptr<MERTPoint> >& points = env.GetSortedSegs(); + for (int i = 0; i < points.size(); ++i) + os << (i==0 ? "" : "|") << "x=" << points[i]->x << ",b=" << points[i]->b << ",m=" << points[i]->m << ",p1=" << points[i]->p1 << ",p2=" << points[i]->p2; + return os << '>'; +} + +#define ORIGINAL_MERT_IMPLEMENTATION 1 +#ifdef ORIGINAL_MERT_IMPLEMENTATION + +struct SlopeCompare { + bool operator() (const shared_ptr<MERTPoint>& a, const shared_ptr<MERTPoint>& b) const { + return a->m < b->m; + } +}; + +const ConvexHull& ConvexHull::operator+=(const ConvexHull& other) { + if (!other.is_sorted) other.Sort(); + if (points.empty()) { + points = other.points; + return *this; + } + is_sorted = false; + int j = points.size(); + points.resize(points.size() + other.points.size()); + for (int i = 0; i < other.points.size(); ++i) + points[j++] = other.points[i]; + assert(j == points.size()); + return *this; +} + +void ConvexHull::Sort() const { + sort(points.begin(), points.end(), SlopeCompare()); + const int k = points.size(); + int j = 0; + for (int i = 0; i < k; ++i) { + MERTPoint l = *points[i]; + l.x = kMinusInfinity; + // cerr << "m=" << l.m << endl; + if (0 < j) { + if (points[j-1]->m == l.m) { // lines are parallel + if (l.b <= points[j-1]->b) continue; + --j; + } + while(0 < j) { + l.x = (l.b - points[j-1]->b) / (points[j-1]->m - l.m); + if (points[j-1]->x < l.x) break; + --j; + } + if (0 == j) l.x = kMinusInfinity; + } + *points[j++] = l; + } + points.resize(j); + is_sorted = true; +} + +const ConvexHull& ConvexHull::operator*=(const ConvexHull& other) { + if (other.IsMultiplicativeIdentity()) { return *this; } + if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; } + + if (!is_sorted) Sort(); + if (!other.is_sorted) other.Sort(); + + if (this->IsEdgeEnvelope()) { +// if (other.size() > 1) +// cerr << *this << " (TIMES) " << other << endl; + shared_ptr<MERTPoint> edge_parent = points[0]; + const double& edge_b = edge_parent->b; + const double& edge_m = edge_parent->m; + points.clear(); + for (int i = 0; i < other.points.size(); ++i) { + const MERTPoint& p = *other.points[i]; + const double m = p.m + edge_m; + const double b = p.b + edge_b; + const double& x = p.x; // x's don't change with * + points.push_back(shared_ptr<MERTPoint>(new MERTPoint(x, m, b, edge_parent, other.points[i]))); + assert(points.back()->p1->edge); + } +// if (other.size() > 1) +// cerr << " = " << *this << endl; + } else { + vector<shared_ptr<MERTPoint> > new_points; + int this_i = 0; + int other_i = 0; + const int this_size = points.size(); + const int other_size = other.points.size(); + double cur_x = kMinusInfinity; // moves from left to right across the + // real numbers, stopping for all inter- + // sections + double this_next_val = (1 < this_size ? points[1]->x : kPlusInfinity); + double other_next_val = (1 < other_size ? other.points[1]->x : kPlusInfinity); + while (this_i < this_size && other_i < other_size) { + const MERTPoint& this_point = *points[this_i]; + const MERTPoint& other_point= *other.points[other_i]; + const double m = this_point.m + other_point.m; + const double b = this_point.b + other_point.b; + + new_points.push_back(shared_ptr<MERTPoint>(new MERTPoint(cur_x, m, b, points[this_i], other.points[other_i]))); + int comp = 0; + if (this_next_val < other_next_val) comp = -1; else + if (this_next_val > other_next_val) comp = 1; + if (0 == comp) { // the next values are equal, advance both indices + ++this_i; + ++other_i; + cur_x = this_next_val; // could be other_next_val (they're equal!) + this_next_val = (this_i+1 < this_size ? points[this_i+1]->x : kPlusInfinity); + other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity); + } else { // advance the i with the lower x, update cur_x + if (-1 == comp) { + ++this_i; + cur_x = this_next_val; + this_next_val = (this_i+1 < this_size ? points[this_i+1]->x : kPlusInfinity); + } else { + ++other_i; + cur_x = other_next_val; + other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity); + } + } + } + points.swap(new_points); + } + //cerr << "Multiply: result=" << (*this) << endl; + return *this; +} + +// recursively construct translation +void MERTPoint::ConstructTranslation(vector<WordID>* trans) const { + const MERTPoint* cur = this; + vector<vector<WordID> > ant_trans; + while(!cur->edge) { + ant_trans.resize(ant_trans.size() + 1); + cur->p2->ConstructTranslation(&ant_trans.back()); + cur = cur->p1.get(); + } + size_t ant_size = ant_trans.size(); + vector<const vector<WordID>*> pants(ant_size); + assert(ant_size == cur->edge->tail_nodes_.size()); + --ant_size; + for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i]; + cur->edge->rule_->ESubstitute(pants, trans); +} + +void MERTPoint::CollectEdgesUsed(std::vector<bool>* edges_used) const { + if (edge) { + assert(edge->id_ < edges_used->size()); + (*edges_used)[edge->id_] = true; + } + if (p1) p1->CollectEdgesUsed(edges_used); + if (p2) p2->CollectEdgesUsed(edges_used); +} + +#else + +// THIS IS THE NEW FASTER IMPLEMENTATION OF THE MERT SEMIRING OPERATIONS + +#endif + diff --git a/dpmert/mert_geometry.h b/dpmert/mert_geometry.h new file mode 100644 index 00000000..a8b6959e --- /dev/null +++ b/dpmert/mert_geometry.h @@ -0,0 +1,81 @@ +#ifndef _MERT_GEOMETRY_H_ +#define _MERT_GEOMETRY_H_ + +#include <vector> +#include <iostream> +#include <boost/shared_ptr.hpp> + +#include "hg.h" +#include "sparse_vector.h" + +static const double kMinusInfinity = -std::numeric_limits<double>::infinity(); +static const double kPlusInfinity = std::numeric_limits<double>::infinity(); + +struct MERTPoint { + MERTPoint() : x(), m(), b(), edge() {} + MERTPoint(double _m, double _b) : + x(kMinusInfinity), m(_m), b(_b), edge() {} + MERTPoint(double _x, double _m, double _b, const boost::shared_ptr<MERTPoint>& p1_, const boost::shared_ptr<MERTPoint>& p2_) : + x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {} + MERTPoint(double _m, double _b, const Hypergraph::Edge& edge) : + x(kMinusInfinity), m(_m), b(_b), edge(&edge) {} + + double x; // x intersection with previous segment in env, or -inf if none + double m; // this line's slope + double b; // intercept with y-axis + + // we keep a pointer to the "parents" of this segment so we can reconstruct + // the Viterbi translation corresponding to this segment + boost::shared_ptr<MERTPoint> p1; + boost::shared_ptr<MERTPoint> p2; + + // only MERTPoints created from an edge using the ConvexHullWeightFunction + // have rules + // TRulePtr rule; + const Hypergraph::Edge* edge; + + // recursively recover the Viterbi translation that will result from setting + // the weights to origin + axis * x, where x is any value from this->x up + // until the next largest x in the containing ConvexHull + void ConstructTranslation(std::vector<WordID>* trans) const; + void CollectEdgesUsed(std::vector<bool>* edges_used) const; +}; + +// this is the semiring value type, +// it defines constructors for 0, 1, and the operations + and * +struct ConvexHull { + // create semiring zero + ConvexHull() : is_sorted(true) {} // zero + // for debugging: + ConvexHull(const std::vector<boost::shared_ptr<MERTPoint> >& s) : points(s) { Sort(); } + // create semiring 1 or 0 + explicit ConvexHull(int i); + ConvexHull(int n, MERTPoint* point) : is_sorted(true), points(n, boost::shared_ptr<MERTPoint>(point)) {} + const ConvexHull& operator+=(const ConvexHull& other); + const ConvexHull& operator*=(const ConvexHull& other); + bool IsMultiplicativeIdentity() const { + return size() == 1 && (points[0]->b == 0.0 && points[0]->m == 0.0) && (!points[0]->edge) && (!points[0]->p1) && (!points[0]->p2); } + const std::vector<boost::shared_ptr<MERTPoint> >& GetSortedSegs() const { + if (!is_sorted) Sort(); + return points; + } + size_t size() const { return points.size(); } + + private: + bool IsEdgeEnvelope() const { + return points.size() == 1 && points[0]->edge; } + void Sort() const; + mutable bool is_sorted; + mutable std::vector<boost::shared_ptr<MERTPoint> > points; +}; +std::ostream& operator<<(std::ostream& os, const ConvexHull& env); + +struct ConvexHullWeightFunction { + ConvexHullWeightFunction(const SparseVector<double>& ori, + const SparseVector<double>& dir) : origin(ori), direction(dir) {} + const ConvexHull operator()(const Hypergraph::Edge& e) const; + const SparseVector<double> origin; + const SparseVector<double> direction; +}; + +#endif diff --git a/vest/mr_vest_generate_mapper_input.cc b/dpmert/mr_dpmert_generate_mapper_input.cc index 59d4f24f..59d4f24f 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/dpmert/mr_dpmert_generate_mapper_input.cc diff --git a/vest/mr_vest_map.cc b/dpmert/mr_dpmert_map.cc index 7d9625bc..f3304f0f 100644 --- a/vest/mr_vest_map.cc +++ b/dpmert/mr_dpmert_map.cc @@ -12,7 +12,7 @@ #include "filelib.h" #include "stringlib.h" #include "sparse_vector.h" -#include "viterbi_envelope.h" +#include "mert_geometry.h" #include "inside_outside.h" #include "error_surface.h" #include "b64tools.h" @@ -95,11 +95,11 @@ int main(int argc, char** argv) { ReadFile rf(file); HypergraphIO::ReadFromJSON(rf.stream(), &hg); } - ViterbiEnvelopeWeightFunction wf(origin, direction); - ViterbiEnvelope ve = Inside<ViterbiEnvelope, ViterbiEnvelopeWeightFunction>(hg, NULL, wf); - ErrorSurface es; + const ConvexHullWeightFunction wf(origin, direction); + const ConvexHull hull = Inside<ConvexHull, ConvexHullWeightFunction>(hg, NULL, wf); - ComputeErrorSurface(*ds[sent_id], ve, &es, metric, hg); + ErrorSurface es; + ComputeErrorSurface(*ds[sent_id], hull, &es, metric, hg); //cerr << "Viterbi envelope has " << ve.size() << " segments\n"; // cerr << "Error surface has " << es.size() << " segments\n"; string val; diff --git a/vest/mr_vest_reduce.cc b/dpmert/mr_dpmert_reduce.cc index dda61f88..dda61f88 100644 --- a/vest/mr_vest_reduce.cc +++ b/dpmert/mr_dpmert_reduce.cc diff --git a/vest/parallelize.pl b/dpmert/parallelize.pl index 7d0365cc..7d0365cc 100755 --- a/vest/parallelize.pl +++ b/dpmert/parallelize.pl diff --git a/vest/sentclient.c b/dpmert/sentclient.c index 91d994ab..91d994ab 100644 --- a/vest/sentclient.c +++ b/dpmert/sentclient.c diff --git a/vest/sentserver.c b/dpmert/sentserver.c index c20b4fa6..c20b4fa6 100644 --- a/vest/sentserver.c +++ b/dpmert/sentserver.c diff --git a/vest/sentserver.h b/dpmert/sentserver.h index cd17a546..cd17a546 100644 --- a/vest/sentserver.h +++ b/dpmert/sentserver.h diff --git a/vest/tac.pl b/dpmert/tac.pl index 9fb525c1..9fb525c1 100755 --- a/vest/tac.pl +++ b/dpmert/tac.pl diff --git a/vest/test_aer/README b/dpmert/test_aer/README index 819b2e32..819b2e32 100644 --- a/vest/test_aer/README +++ b/dpmert/test_aer/README diff --git a/vest/test_aer/cdec.ini b/dpmert/test_aer/cdec.ini index 08187848..08187848 100644 --- a/vest/test_aer/cdec.ini +++ b/dpmert/test_aer/cdec.ini diff --git a/vest/test_aer/corpus.src b/dpmert/test_aer/corpus.src index 31b23971..31b23971 100644 --- a/vest/test_aer/corpus.src +++ b/dpmert/test_aer/corpus.src diff --git a/vest/test_aer/grammar b/dpmert/test_aer/grammar index 9d857824..9d857824 100644 --- a/vest/test_aer/grammar +++ b/dpmert/test_aer/grammar diff --git a/vest/test_aer/ref.0 b/dpmert/test_aer/ref.0 index 734a9c5b..734a9c5b 100644 --- a/vest/test_aer/ref.0 +++ b/dpmert/test_aer/ref.0 diff --git a/vest/test_aer/weights b/dpmert/test_aer/weights index afc9282e..afc9282e 100644 --- a/vest/test_aer/weights +++ b/dpmert/test_aer/weights diff --git a/vest/test_data/0.json.gz b/dpmert/test_data/0.json.gz Binary files differindex 30f8dd77..30f8dd77 100644 --- a/vest/test_data/0.json.gz +++ b/dpmert/test_data/0.json.gz diff --git a/vest/test_data/1.json.gz b/dpmert/test_data/1.json.gz Binary files differindex c82cc179..c82cc179 100644 --- a/vest/test_data/1.json.gz +++ b/dpmert/test_data/1.json.gz diff --git a/vest/test_data/c2e.txt.0 b/dpmert/test_data/c2e.txt.0 index 12c4abe9..12c4abe9 100644 --- a/vest/test_data/c2e.txt.0 +++ b/dpmert/test_data/c2e.txt.0 diff --git a/vest/test_data/c2e.txt.1 b/dpmert/test_data/c2e.txt.1 index 4ac12df1..4ac12df1 100644 --- a/vest/test_data/c2e.txt.1 +++ b/dpmert/test_data/c2e.txt.1 diff --git a/vest/test_data/c2e.txt.2 b/dpmert/test_data/c2e.txt.2 index 2f67b72f..2f67b72f 100644 --- a/vest/test_data/c2e.txt.2 +++ b/dpmert/test_data/c2e.txt.2 diff --git a/vest/test_data/c2e.txt.3 b/dpmert/test_data/c2e.txt.3 index 5483cef6..5483cef6 100644 --- a/vest/test_data/c2e.txt.3 +++ b/dpmert/test_data/c2e.txt.3 diff --git a/vest/test_data/re.txt.0 b/dpmert/test_data/re.txt.0 index 86eff087..86eff087 100644 --- a/vest/test_data/re.txt.0 +++ b/dpmert/test_data/re.txt.0 diff --git a/vest/test_data/re.txt.1 b/dpmert/test_data/re.txt.1 index 2140f198..2140f198 100644 --- a/vest/test_data/re.txt.1 +++ b/dpmert/test_data/re.txt.1 diff --git a/vest/test_data/re.txt.2 b/dpmert/test_data/re.txt.2 index 94e46286..94e46286 100644 --- a/vest/test_data/re.txt.2 +++ b/dpmert/test_data/re.txt.2 diff --git a/vest/test_data/re.txt.3 b/dpmert/test_data/re.txt.3 index f87c3308..f87c3308 100644 --- a/vest/test_data/re.txt.3 +++ b/dpmert/test_data/re.txt.3 diff --git a/vest/Makefile.am b/vest/Makefile.am deleted file mode 100644 index 05fa5639..00000000 --- a/vest/Makefile.am +++ /dev/null @@ -1,35 +0,0 @@ -bin_PROGRAMS = \ - mr_vest_map \ - mr_vest_reduce \ - mr_vest_generate_mapper_input \ - sentserver \ - sentclient - -if HAVE_GTEST -noinst_PROGRAMS = \ - lo_test -TESTS = lo_test -endif - -sentserver_SOURCES = sentserver.c -sentserver_LDFLAGS = -all-static -pthread - -sentclient_SOURCES = sentclient.c -sentclient_LDFLAGS = -all-static -pthread - -mr_vest_generate_mapper_input_SOURCES = mr_vest_generate_mapper_input.cc line_optimizer.cc -mr_vest_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -# nbest2hg_SOURCES = nbest2hg.cc -# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz - -mr_vest_map_SOURCES = viterbi_envelope.cc ces.cc error_surface.cc mr_vest_map.cc line_optimizer.cc -mr_vest_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -mr_vest_reduce_SOURCES = error_surface.cc ces.cc mr_vest_reduce.cc line_optimizer.cc viterbi_envelope.cc -mr_vest_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -lo_test_SOURCES = lo_test.cc ces.cc viterbi_envelope.cc error_surface.cc line_optimizer.cc -lo_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/vest/viterbi_envelope.cc b/vest/viterbi_envelope.cc deleted file mode 100644 index 9fcf75a0..00000000 --- a/vest/viterbi_envelope.cc +++ /dev/null @@ -1,177 +0,0 @@ -#include "viterbi_envelope.h" - -#include <cassert> -#include <limits> - -using namespace std; -using boost::shared_ptr; - -ostream& operator<<(ostream& os, const ViterbiEnvelope& env) { - os << '<'; - const vector<shared_ptr<Segment> >& segs = env.GetSortedSegs(); - for (int i = 0; i < segs.size(); ++i) - os << (i==0 ? "" : "|") << "x=" << segs[i]->x << ",b=" << segs[i]->b << ",m=" << segs[i]->m << ",p1=" << segs[i]->p1 << ",p2=" << segs[i]->p2; - return os << '>'; -} - -ViterbiEnvelope::ViterbiEnvelope(int i) { - if (i == 0) { - // do nothing - <> - } else if (i == 1) { - segs.push_back(shared_ptr<Segment>(new Segment(0, 0, 0, shared_ptr<Segment>(), shared_ptr<Segment>()))); - assert(this->IsMultiplicativeIdentity()); - } else { - cerr << "Only can create ViterbiEnvelope semiring 0 and 1 with this constructor!\n"; - abort(); - } -} - -struct SlopeCompare { - bool operator() (const shared_ptr<Segment>& a, const shared_ptr<Segment>& b) const { - return a->m < b->m; - } -}; - -const ViterbiEnvelope& ViterbiEnvelope::operator+=(const ViterbiEnvelope& other) { - if (!other.is_sorted) other.Sort(); - if (segs.empty()) { - segs = other.segs; - return *this; - } - is_sorted = false; - int j = segs.size(); - segs.resize(segs.size() + other.segs.size()); - for (int i = 0; i < other.segs.size(); ++i) - segs[j++] = other.segs[i]; - assert(j == segs.size()); - return *this; -} - -void ViterbiEnvelope::Sort() const { - sort(segs.begin(), segs.end(), SlopeCompare()); - const int k = segs.size(); - int j = 0; - for (int i = 0; i < k; ++i) { - Segment l = *segs[i]; - l.x = kMinusInfinity; - // cerr << "m=" << l.m << endl; - if (0 < j) { - if (segs[j-1]->m == l.m) { // lines are parallel - if (l.b <= segs[j-1]->b) continue; - --j; - } - while(0 < j) { - l.x = (l.b - segs[j-1]->b) / (segs[j-1]->m - l.m); - if (segs[j-1]->x < l.x) break; - --j; - } - if (0 == j) l.x = kMinusInfinity; - } - *segs[j++] = l; - } - segs.resize(j); - is_sorted = true; -} - -const ViterbiEnvelope& ViterbiEnvelope::operator*=(const ViterbiEnvelope& other) { - if (other.IsMultiplicativeIdentity()) { return *this; } - if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; } - - if (!is_sorted) Sort(); - if (!other.is_sorted) other.Sort(); - - if (this->IsEdgeEnvelope()) { -// if (other.size() > 1) -// cerr << *this << " (TIMES) " << other << endl; - shared_ptr<Segment> edge_parent = segs[0]; - const double& edge_b = edge_parent->b; - const double& edge_m = edge_parent->m; - segs.clear(); - for (int i = 0; i < other.segs.size(); ++i) { - const Segment& seg = *other.segs[i]; - const double m = seg.m + edge_m; - const double b = seg.b + edge_b; - const double& x = seg.x; // x's don't change with * - segs.push_back(shared_ptr<Segment>(new Segment(x, m, b, edge_parent, other.segs[i]))); - assert(segs.back()->p1->edge); - } -// if (other.size() > 1) -// cerr << " = " << *this << endl; - } else { - vector<shared_ptr<Segment> > new_segs; - int this_i = 0; - int other_i = 0; - const int this_size = segs.size(); - const int other_size = other.segs.size(); - double cur_x = kMinusInfinity; // moves from left to right across the - // real numbers, stopping for all inter- - // sections - double this_next_val = (1 < this_size ? segs[1]->x : kPlusInfinity); - double other_next_val = (1 < other_size ? other.segs[1]->x : kPlusInfinity); - while (this_i < this_size && other_i < other_size) { - const Segment& this_seg = *segs[this_i]; - const Segment& other_seg= *other.segs[other_i]; - const double m = this_seg.m + other_seg.m; - const double b = this_seg.b + other_seg.b; - - new_segs.push_back(shared_ptr<Segment>(new Segment(cur_x, m, b, segs[this_i], other.segs[other_i]))); - int comp = 0; - if (this_next_val < other_next_val) comp = -1; else - if (this_next_val > other_next_val) comp = 1; - if (0 == comp) { // the next values are equal, advance both indices - ++this_i; - ++other_i; - cur_x = this_next_val; // could be other_next_val (they're equal!) - this_next_val = (this_i+1 < this_size ? segs[this_i+1]->x : kPlusInfinity); - other_next_val = (other_i+1 < other_size ? other.segs[other_i+1]->x : kPlusInfinity); - } else { // advance the i with the lower x, update cur_x - if (-1 == comp) { - ++this_i; - cur_x = this_next_val; - this_next_val = (this_i+1 < this_size ? segs[this_i+1]->x : kPlusInfinity); - } else { - ++other_i; - cur_x = other_next_val; - other_next_val = (other_i+1 < other_size ? other.segs[other_i+1]->x : kPlusInfinity); - } - } - } - segs.swap(new_segs); - } - //cerr << "Multiply: result=" << (*this) << endl; - return *this; -} - -// recursively construct translation -void Segment::ConstructTranslation(vector<WordID>* trans) const { - const Segment* cur = this; - vector<vector<WordID> > ant_trans; - while(!cur->edge) { - ant_trans.resize(ant_trans.size() + 1); - cur->p2->ConstructTranslation(&ant_trans.back()); - cur = cur->p1.get(); - } - size_t ant_size = ant_trans.size(); - vector<const vector<WordID>*> pants(ant_size); - assert(ant_size == cur->edge->tail_nodes_.size()); - --ant_size; - for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i]; - cur->edge->rule_->ESubstitute(pants, trans); -} - -void Segment::CollectEdgesUsed(std::vector<bool>* edges_used) const { - if (edge) { - assert(edge->id_ < edges_used->size()); - (*edges_used)[edge->id_] = true; - } - if (p1) p1->CollectEdgesUsed(edges_used); - if (p2) p2->CollectEdgesUsed(edges_used); -} - -ViterbiEnvelope ViterbiEnvelopeWeightFunction::operator()(const Hypergraph::Edge& e) const { - const double m = direction.dot(e.feature_values_); - const double b = origin.dot(e.feature_values_); - Segment* seg = new Segment(m, b, e); - return ViterbiEnvelope(1, seg); -} - diff --git a/vest/viterbi_envelope.h b/vest/viterbi_envelope.h deleted file mode 100644 index 60ad82d8..00000000 --- a/vest/viterbi_envelope.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef _VITERBI_ENVELOPE_H_ -#define _VITERBI_ENVELOPE_H_ - -#include <vector> -#include <iostream> -#include <boost/shared_ptr.hpp> - -#include "hg.h" -#include "sparse_vector.h" - -static const double kMinusInfinity = -std::numeric_limits<double>::infinity(); -static const double kPlusInfinity = std::numeric_limits<double>::infinity(); - -struct Segment { - Segment() : x(), m(), b(), edge() {} - Segment(double _m, double _b) : - x(kMinusInfinity), m(_m), b(_b), edge() {} - Segment(double _x, double _m, double _b, const boost::shared_ptr<Segment>& p1_, const boost::shared_ptr<Segment>& p2_) : - x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {} - Segment(double _m, double _b, const Hypergraph::Edge& edge) : - x(kMinusInfinity), m(_m), b(_b), edge(&edge) {} - - double x; // x intersection with previous segment in env, or -inf if none - double m; // this line's slope - double b; // intercept with y-axis - - // we keep a pointer to the "parents" of this segment so we can reconstruct - // the Viterbi translation corresponding to this segment - boost::shared_ptr<Segment> p1; - boost::shared_ptr<Segment> p2; - - // only Segments created from an edge using the ViterbiEnvelopeWeightFunction - // have rules - // TRulePtr rule; - const Hypergraph::Edge* edge; - - // recursively recover the Viterbi translation that will result from setting - // the weights to origin + axis * x, where x is any value from this->x up - // until the next largest x in the containing ViterbiEnvelope - void ConstructTranslation(std::vector<WordID>* trans) const; - void CollectEdgesUsed(std::vector<bool>* edges_used) const; -}; - -// this is the semiring value type, -// it defines constructors for 0, 1, and the operations + and * -struct ViterbiEnvelope { - // create semiring zero - ViterbiEnvelope() : is_sorted(true) {} // zero - // for debugging: - ViterbiEnvelope(const std::vector<boost::shared_ptr<Segment> >& s) : segs(s) { Sort(); } - // create semiring 1 or 0 - explicit ViterbiEnvelope(int i); - ViterbiEnvelope(int n, Segment* seg) : is_sorted(true), segs(n, boost::shared_ptr<Segment>(seg)) {} - const ViterbiEnvelope& operator+=(const ViterbiEnvelope& other); - const ViterbiEnvelope& operator*=(const ViterbiEnvelope& other); - bool IsMultiplicativeIdentity() const { - return size() == 1 && (segs[0]->b == 0.0 && segs[0]->m == 0.0) && (!segs[0]->edge) && (!segs[0]->p1) && (!segs[0]->p2); } - const std::vector<boost::shared_ptr<Segment> >& GetSortedSegs() const { - if (!is_sorted) Sort(); - return segs; - } - size_t size() const { return segs.size(); } - - private: - bool IsEdgeEnvelope() const { - return segs.size() == 1 && segs[0]->edge; } - void Sort() const; - mutable bool is_sorted; - mutable std::vector<boost::shared_ptr<Segment> > segs; -}; -std::ostream& operator<<(std::ostream& os, const ViterbiEnvelope& env); - -struct ViterbiEnvelopeWeightFunction { - ViterbiEnvelopeWeightFunction(const SparseVector<double>& ori, - const SparseVector<double>& dir) : origin(ori), direction(dir) {} - ViterbiEnvelope operator()(const Hypergraph::Edge& e) const; - const SparseVector<double> origin; - const SparseVector<double> direction; -}; - -#endif |