summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-24 23:04:46 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-24 23:04:46 -0400
commit92a115b036c4fa18af14ec0cf297f1dd8f563331 (patch)
treee03962037b059026f4d0cda56eec9d6725231ce4 /utils
parent52c08b69c9c93e1a75fb8b175543e56276193bd5 (diff)
rename aligner, add support for distinguishing translation / transliteration
Diffstat (limited to 'utils')
-rw-r--r--utils/Makefile.am2
-rw-r--r--utils/alignment_io.cc (renamed from utils/alignment_pharaoh.cc)36
-rw-r--r--utils/alignment_io.h42
-rw-r--r--utils/alignment_pharaoh.h14
-rw-r--r--utils/atools.cc8
5 files changed, 75 insertions, 27 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 3ea21835..2fc6ae21 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -23,7 +23,7 @@ atools_SOURCES = atools.cc
noinst_LIBRARIES = libutils.a
libutils_a_SOURCES = \
- alignment_pharaoh.cc \
+ alignment_io.cc \
b64tools.cc \
corpus_tools.cc \
dict.cc \
diff --git a/utils/alignment_pharaoh.cc b/utils/alignment_io.cc
index 890ff565..1d923f7f 100644
--- a/utils/alignment_pharaoh.cc
+++ b/utils/alignment_io.cc
@@ -1,12 +1,10 @@
-#include "utils/alignment_pharaoh.h"
-
-#include <set>
+#include "utils/alignment_io.h"
using namespace std;
static bool is_digit(char x) { return x >= '0' && x <= '9'; }
-boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(const string& al) {
+boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const string& al) {
int max_x = 0;
int max_y = 0;
int i = 0;
@@ -64,14 +62,36 @@ boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(con
return grid;
}
-void AlignmentPharaoh::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) {
+void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* o) {
+ ostream& out = *o;
bool need_space = false;
for (int i = 0; i < alignment.width(); ++i)
for (int j = 0; j < alignment.height(); ++j)
if (alignment(i,j)) {
- if (need_space) (*out) << ' '; else need_space = true;
- (*out) << i << '-' << j;
+ if (need_space) out << ' '; else need_space = true;
+ out << i << '-' << j;
}
- (*out) << endl;
+ out << endl;
+}
+
+void AlignmentIO::SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, ostream* o) {
+ ostream& out = *o;
+ bool need_space = false;
+ for (int i = 0; i < alignment.width(); ++i)
+ for (int j = 0; j < alignment.height(); ++j) {
+ const AlignmentType& aij = alignment(i,j);
+ if (aij != kNONE) {
+ if (need_space) out << ' '; else need_space = true;
+ if (aij == kTRANSLATION) {}
+ else if (aij == kTRANSLITERATION) {
+ out << 'T' << ':';
+ } else {
+ cerr << "\nUnexpected alignment point type: " << static_cast<int>(aij) << endl;
+ abort();
+ }
+ out << i << '-' << j;
+ }
+ }
+ out << endl;
}
diff --git a/utils/alignment_io.h b/utils/alignment_io.h
new file mode 100644
index 00000000..36bcecd7
--- /dev/null
+++ b/utils/alignment_io.h
@@ -0,0 +1,42 @@
+#ifndef _ALIGNMENT_IO_H_
+#define _ALIGNMENT_IO_H_
+
+#include <string>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+#include "array2d.h"
+
+struct AlignmentIO {
+ enum AlignmentType { kNONE = 0, kTRANSLATION = 1, kTRANSLITERATION = 2 };
+
+ static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
+ static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
+ static void SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, std::ostream* out);
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::AlignmentType>& m) {
+ os << ' ';
+ for (int j=0; j<m.height(); ++j)
+ os << (j%10);
+ os << "\n";
+ for (int i=0; i<m.width(); ++i) {
+ os << (i%10);
+ for (int j=0; j<m.height(); ++j) {
+ switch (m(i,j)) {
+ case AlignmentIO::kNONE: os << '.'; break;
+ case AlignmentIO::kTRANSLATION: os << '*'; break;
+ case AlignmentIO::kTRANSLITERATION: os << '#'; break;
+ default: os << '?'; break;
+ }
+ }
+ os << (i%10) << "\n";
+ }
+ os << ' ';
+ for (int j=0; j<m.height(); ++j)
+ os << (j%10);
+ os << "\n";
+ return os;
+}
+
+
+#endif
diff --git a/utils/alignment_pharaoh.h b/utils/alignment_pharaoh.h
deleted file mode 100644
index d111c8bf..00000000
--- a/utils/alignment_pharaoh.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _PHARAOH_ALIGNMENT_H_
-#define _PHARAOH_ALIGNMENT_H_
-
-#include <string>
-#include <iostream>
-#include <boost/shared_ptr.hpp>
-#include "array2d.h"
-
-struct AlignmentPharaoh {
- static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
- static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
-};
-
-#endif
diff --git a/utils/atools.cc b/utils/atools.cc
index ba56dd6c..bce7822e 100644
--- a/utils/atools.cc
+++ b/utils/atools.cc
@@ -8,7 +8,7 @@
#include <boost/shared_ptr.hpp>
#include "filelib.h"
-#include "alignment_pharaoh.h"
+#include "alignment_io.h"
namespace po = boost::program_options;
using namespace std;
@@ -348,9 +348,9 @@ int main(int argc, char **argv) {
}
if (line1.empty() && !*in1) break;
boost::shared_ptr<Array2D<bool> > out(new Array2D<bool>);
- boost::shared_ptr<Array2D<bool> > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1);
+ boost::shared_ptr<Array2D<bool> > a1 = AlignmentIO::ReadPharaohAlignmentGrid(line1);
if (in2) {
- boost::shared_ptr<Array2D<bool> > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2);
+ boost::shared_ptr<Array2D<bool> > a2 = AlignmentIO::ReadPharaohAlignmentGrid(line2);
cmd.Apply(*a1, *a2, out.get());
} else {
Array2D<bool> dummy;
@@ -358,7 +358,7 @@ int main(int argc, char **argv) {
}
if (cmd.Result() == 1) {
- AlignmentPharaoh::SerializePharaohFormat(*out, &cout);
+ AlignmentIO::SerializePharaohFormat(*out, &cout);
}
}
if (cmd.Result() == 2)