summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-24 23:04:46 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-24 23:04:46 -0400
commitb6eede632af4fa58a6f5325ee0d059c02a898b9f (patch)
tree4d29d1d1d700b9540af213bac32ffda96046abe1
parent0c7e078d14dd7078ec4a5b3e77007609aec5e54c (diff)
rename aligner, add support for distinguishing translation / transliteration
-rw-r--r--decoder/aligner.cc4
-rw-r--r--decoder/ff_wordalign.cc1
-rw-r--r--mteval/aer_scorer.cc6
-rw-r--r--utils/Makefile.am2
-rw-r--r--utils/alignment_io.cc (renamed from utils/alignment_pharaoh.cc)36
-rw-r--r--utils/alignment_io.h42
-rw-r--r--utils/alignment_pharaoh.h14
-rw-r--r--utils/atools.cc8
8 files changed, 80 insertions, 33 deletions
diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index 53e059fb..232e022a 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -11,7 +11,7 @@
#include "sentence_metadata.h"
#include "inside_outside.h"
#include "viterbi.h"
-#include "alignment_pharaoh.h"
+#include "alignment_io.h"
using namespace std;
@@ -300,7 +300,7 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
cerr << grid << endl;
}
(*out) << TD::GetString(src_sent) << " ||| " << TD::GetString(trg_sent) << " ||| ";
- AlignmentPharaoh::SerializePharaohFormat(grid, out);
+ AlignmentIO::SerializePharaohFormat(grid, out);
}
};
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index 9e7c618e..decdf9bc 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -15,7 +15,6 @@
#include "factored_lexicon_helper.h"
#include "verbose.h"
-#include "alignment_pharaoh.h"
#include "stringlib.h"
#include "sentence_metadata.h"
#include "hg.h"
diff --git a/mteval/aer_scorer.cc b/mteval/aer_scorer.cc
index edd4390f..ae3192d4 100644
--- a/mteval/aer_scorer.cc
+++ b/mteval/aer_scorer.cc
@@ -5,7 +5,7 @@
#include <sstream>
#include "tdict.h"
-#include "alignment_pharaoh.h"
+#include "alignment_io.h"
using namespace std;
@@ -85,7 +85,7 @@ AERScorer::AERScorer(const vector<vector<WordID> >& refs, const string& src) : s
cerr << "AERScorer can only take a single reference!\n";
abort();
}
- ref_ = AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(refs.front()));
+ ref_ = AlignmentIO::ReadPharaohAlignmentGrid(TD::GetString(refs.front()));
}
static inline bool Safe(const Array2D<bool>& a, int i, int j) {
@@ -101,7 +101,7 @@ ScoreP AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const {
ScoreP AERScorer::ScoreCandidate(const vector<WordID>& shyp) const {
boost::shared_ptr<Array2D<bool> > hyp =
- AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(shyp));
+ AlignmentIO::ReadPharaohAlignmentGrid(TD::GetString(shyp));
int m = 0;
int r = 0;
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 3ea21835..2fc6ae21 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -23,7 +23,7 @@ atools_SOURCES = atools.cc
noinst_LIBRARIES = libutils.a
libutils_a_SOURCES = \
- alignment_pharaoh.cc \
+ alignment_io.cc \
b64tools.cc \
corpus_tools.cc \
dict.cc \
diff --git a/utils/alignment_pharaoh.cc b/utils/alignment_io.cc
index 890ff565..1d923f7f 100644
--- a/utils/alignment_pharaoh.cc
+++ b/utils/alignment_io.cc
@@ -1,12 +1,10 @@
-#include "utils/alignment_pharaoh.h"
-
-#include <set>
+#include "utils/alignment_io.h"
using namespace std;
static bool is_digit(char x) { return x >= '0' && x <= '9'; }
-boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(const string& al) {
+boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const string& al) {
int max_x = 0;
int max_y = 0;
int i = 0;
@@ -64,14 +62,36 @@ boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(con
return grid;
}
-void AlignmentPharaoh::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) {
+void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* o) {
+ ostream& out = *o;
bool need_space = false;
for (int i = 0; i < alignment.width(); ++i)
for (int j = 0; j < alignment.height(); ++j)
if (alignment(i,j)) {
- if (need_space) (*out) << ' '; else need_space = true;
- (*out) << i << '-' << j;
+ if (need_space) out << ' '; else need_space = true;
+ out << i << '-' << j;
}
- (*out) << endl;
+ out << endl;
+}
+
+void AlignmentIO::SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, ostream* o) {
+ ostream& out = *o;
+ bool need_space = false;
+ for (int i = 0; i < alignment.width(); ++i)
+ for (int j = 0; j < alignment.height(); ++j) {
+ const AlignmentType& aij = alignment(i,j);
+ if (aij != kNONE) {
+ if (need_space) out << ' '; else need_space = true;
+ if (aij == kTRANSLATION) {}
+ else if (aij == kTRANSLITERATION) {
+ out << 'T' << ':';
+ } else {
+ cerr << "\nUnexpected alignment point type: " << static_cast<int>(aij) << endl;
+ abort();
+ }
+ out << i << '-' << j;
+ }
+ }
+ out << endl;
}
diff --git a/utils/alignment_io.h b/utils/alignment_io.h
new file mode 100644
index 00000000..36bcecd7
--- /dev/null
+++ b/utils/alignment_io.h
@@ -0,0 +1,42 @@
+#ifndef _ALIGNMENT_IO_H_
+#define _ALIGNMENT_IO_H_
+
+#include <string>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+#include "array2d.h"
+
+struct AlignmentIO {
+ enum AlignmentType { kNONE = 0, kTRANSLATION = 1, kTRANSLITERATION = 2 };
+
+ static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
+ static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
+ static void SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, std::ostream* out);
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::AlignmentType>& m) {
+ os << ' ';
+ for (int j=0; j<m.height(); ++j)
+ os << (j%10);
+ os << "\n";
+ for (int i=0; i<m.width(); ++i) {
+ os << (i%10);
+ for (int j=0; j<m.height(); ++j) {
+ switch (m(i,j)) {
+ case AlignmentIO::kNONE: os << '.'; break;
+ case AlignmentIO::kTRANSLATION: os << '*'; break;
+ case AlignmentIO::kTRANSLITERATION: os << '#'; break;
+ default: os << '?'; break;
+ }
+ }
+ os << (i%10) << "\n";
+ }
+ os << ' ';
+ for (int j=0; j<m.height(); ++j)
+ os << (j%10);
+ os << "\n";
+ return os;
+}
+
+
+#endif
diff --git a/utils/alignment_pharaoh.h b/utils/alignment_pharaoh.h
deleted file mode 100644
index d111c8bf..00000000
--- a/utils/alignment_pharaoh.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _PHARAOH_ALIGNMENT_H_
-#define _PHARAOH_ALIGNMENT_H_
-
-#include <string>
-#include <iostream>
-#include <boost/shared_ptr.hpp>
-#include "array2d.h"
-
-struct AlignmentPharaoh {
- static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
- static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
-};
-
-#endif
diff --git a/utils/atools.cc b/utils/atools.cc
index ba56dd6c..bce7822e 100644
--- a/utils/atools.cc
+++ b/utils/atools.cc
@@ -8,7 +8,7 @@
#include <boost/shared_ptr.hpp>
#include "filelib.h"
-#include "alignment_pharaoh.h"
+#include "alignment_io.h"
namespace po = boost::program_options;
using namespace std;
@@ -348,9 +348,9 @@ int main(int argc, char **argv) {
}
if (line1.empty() && !*in1) break;
boost::shared_ptr<Array2D<bool> > out(new Array2D<bool>);
- boost::shared_ptr<Array2D<bool> > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1);
+ boost::shared_ptr<Array2D<bool> > a1 = AlignmentIO::ReadPharaohAlignmentGrid(line1);
if (in2) {
- boost::shared_ptr<Array2D<bool> > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2);
+ boost::shared_ptr<Array2D<bool> > a2 = AlignmentIO::ReadPharaohAlignmentGrid(line2);
cmd.Apply(*a1, *a2, out.get());
} else {
Array2D<bool> dummy;
@@ -358,7 +358,7 @@ int main(int argc, char **argv) {
}
if (cmd.Result() == 1) {
- AlignmentPharaoh::SerializePharaohFormat(*out, &cout);
+ AlignmentIO::SerializePharaohFormat(*out, &cout);
}
}
if (cmd.Result() == 2)