diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-03-24 23:04:46 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-03-24 23:04:46 -0400 |
commit | b6eede632af4fa58a6f5325ee0d059c02a898b9f (patch) | |
tree | 4d29d1d1d700b9540af213bac32ffda96046abe1 | |
parent | 0c7e078d14dd7078ec4a5b3e77007609aec5e54c (diff) |
rename aligner, add support for distinguishing translation / transliteration
-rw-r--r-- | decoder/aligner.cc | 4 | ||||
-rw-r--r-- | decoder/ff_wordalign.cc | 1 | ||||
-rw-r--r-- | mteval/aer_scorer.cc | 6 | ||||
-rw-r--r-- | utils/Makefile.am | 2 | ||||
-rw-r--r-- | utils/alignment_io.cc (renamed from utils/alignment_pharaoh.cc) | 36 | ||||
-rw-r--r-- | utils/alignment_io.h | 42 | ||||
-rw-r--r-- | utils/alignment_pharaoh.h | 14 | ||||
-rw-r--r-- | utils/atools.cc | 8 |
8 files changed, 80 insertions, 33 deletions
diff --git a/decoder/aligner.cc b/decoder/aligner.cc index 53e059fb..232e022a 100644 --- a/decoder/aligner.cc +++ b/decoder/aligner.cc @@ -11,7 +11,7 @@ #include "sentence_metadata.h" #include "inside_outside.h" #include "viterbi.h" -#include "alignment_pharaoh.h" +#include "alignment_io.h" using namespace std; @@ -300,7 +300,7 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice, cerr << grid << endl; } (*out) << TD::GetString(src_sent) << " ||| " << TD::GetString(trg_sent) << " ||| "; - AlignmentPharaoh::SerializePharaohFormat(grid, out); + AlignmentIO::SerializePharaohFormat(grid, out); } }; diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index 9e7c618e..decdf9bc 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -15,7 +15,6 @@ #include "factored_lexicon_helper.h" #include "verbose.h" -#include "alignment_pharaoh.h" #include "stringlib.h" #include "sentence_metadata.h" #include "hg.h" diff --git a/mteval/aer_scorer.cc b/mteval/aer_scorer.cc index edd4390f..ae3192d4 100644 --- a/mteval/aer_scorer.cc +++ b/mteval/aer_scorer.cc @@ -5,7 +5,7 @@ #include <sstream> #include "tdict.h" -#include "alignment_pharaoh.h" +#include "alignment_io.h" using namespace std; @@ -85,7 +85,7 @@ AERScorer::AERScorer(const vector<vector<WordID> >& refs, const string& src) : s cerr << "AERScorer can only take a single reference!\n"; abort(); } - ref_ = AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(refs.front())); + ref_ = AlignmentIO::ReadPharaohAlignmentGrid(TD::GetString(refs.front())); } static inline bool Safe(const Array2D<bool>& a, int i, int j) { @@ -101,7 +101,7 @@ ScoreP AERScorer::ScoreCCandidate(const vector<WordID>& shyp) const { ScoreP AERScorer::ScoreCandidate(const vector<WordID>& shyp) const { boost::shared_ptr<Array2D<bool> > hyp = - AlignmentPharaoh::ReadPharaohAlignmentGrid(TD::GetString(shyp)); + AlignmentIO::ReadPharaohAlignmentGrid(TD::GetString(shyp)); int m = 0; int r = 0; diff --git a/utils/Makefile.am b/utils/Makefile.am index 3ea21835..2fc6ae21 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -23,7 +23,7 @@ atools_SOURCES = atools.cc noinst_LIBRARIES = libutils.a libutils_a_SOURCES = \ - alignment_pharaoh.cc \ + alignment_io.cc \ b64tools.cc \ corpus_tools.cc \ dict.cc \ diff --git a/utils/alignment_pharaoh.cc b/utils/alignment_io.cc index 890ff565..1d923f7f 100644 --- a/utils/alignment_pharaoh.cc +++ b/utils/alignment_io.cc @@ -1,12 +1,10 @@ -#include "utils/alignment_pharaoh.h" - -#include <set> +#include "utils/alignment_io.h" using namespace std; static bool is_digit(char x) { return x >= '0' && x <= '9'; } -boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(const string& al) { +boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const string& al) { int max_x = 0; int max_y = 0; int i = 0; @@ -64,14 +62,36 @@ boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(con return grid; } -void AlignmentPharaoh::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) { +void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* o) { + ostream& out = *o; bool need_space = false; for (int i = 0; i < alignment.width(); ++i) for (int j = 0; j < alignment.height(); ++j) if (alignment(i,j)) { - if (need_space) (*out) << ' '; else need_space = true; - (*out) << i << '-' << j; + if (need_space) out << ' '; else need_space = true; + out << i << '-' << j; } - (*out) << endl; + out << endl; +} + +void AlignmentIO::SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, ostream* o) { + ostream& out = *o; + bool need_space = false; + for (int i = 0; i < alignment.width(); ++i) + for (int j = 0; j < alignment.height(); ++j) { + const AlignmentType& aij = alignment(i,j); + if (aij != kNONE) { + if (need_space) out << ' '; else need_space = true; + if (aij == kTRANSLATION) {} + else if (aij == kTRANSLITERATION) { + out << 'T' << ':'; + } else { + cerr << "\nUnexpected alignment point type: " << static_cast<int>(aij) << endl; + abort(); + } + out << i << '-' << j; + } + } + out << endl; } diff --git a/utils/alignment_io.h b/utils/alignment_io.h new file mode 100644 index 00000000..36bcecd7 --- /dev/null +++ b/utils/alignment_io.h @@ -0,0 +1,42 @@ +#ifndef _ALIGNMENT_IO_H_ +#define _ALIGNMENT_IO_H_ + +#include <string> +#include <iostream> +#include <boost/shared_ptr.hpp> +#include "array2d.h" + +struct AlignmentIO { + enum AlignmentType { kNONE = 0, kTRANSLATION = 1, kTRANSLITERATION = 2 }; + + static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al); + static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out); + static void SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, std::ostream* out); +}; + +inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::AlignmentType>& m) { + os << ' '; + for (int j=0; j<m.height(); ++j) + os << (j%10); + os << "\n"; + for (int i=0; i<m.width(); ++i) { + os << (i%10); + for (int j=0; j<m.height(); ++j) { + switch (m(i,j)) { + case AlignmentIO::kNONE: os << '.'; break; + case AlignmentIO::kTRANSLATION: os << '*'; break; + case AlignmentIO::kTRANSLITERATION: os << '#'; break; + default: os << '?'; break; + } + } + os << (i%10) << "\n"; + } + os << ' '; + for (int j=0; j<m.height(); ++j) + os << (j%10); + os << "\n"; + return os; +} + + +#endif diff --git a/utils/alignment_pharaoh.h b/utils/alignment_pharaoh.h deleted file mode 100644 index d111c8bf..00000000 --- a/utils/alignment_pharaoh.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _PHARAOH_ALIGNMENT_H_ -#define _PHARAOH_ALIGNMENT_H_ - -#include <string> -#include <iostream> -#include <boost/shared_ptr.hpp> -#include "array2d.h" - -struct AlignmentPharaoh { - static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al); - static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out); -}; - -#endif diff --git a/utils/atools.cc b/utils/atools.cc index ba56dd6c..bce7822e 100644 --- a/utils/atools.cc +++ b/utils/atools.cc @@ -8,7 +8,7 @@ #include <boost/shared_ptr.hpp> #include "filelib.h" -#include "alignment_pharaoh.h" +#include "alignment_io.h" namespace po = boost::program_options; using namespace std; @@ -348,9 +348,9 @@ int main(int argc, char **argv) { } if (line1.empty() && !*in1) break; boost::shared_ptr<Array2D<bool> > out(new Array2D<bool>); - boost::shared_ptr<Array2D<bool> > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1); + boost::shared_ptr<Array2D<bool> > a1 = AlignmentIO::ReadPharaohAlignmentGrid(line1); if (in2) { - boost::shared_ptr<Array2D<bool> > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2); + boost::shared_ptr<Array2D<bool> > a2 = AlignmentIO::ReadPharaohAlignmentGrid(line2); cmd.Apply(*a1, *a2, out.get()); } else { Array2D<bool> dummy; @@ -358,7 +358,7 @@ int main(int argc, char **argv) { } if (cmd.Result() == 1) { - AlignmentPharaoh::SerializePharaohFormat(*out, &cout); + AlignmentIO::SerializePharaohFormat(*out, &cout); } } if (cmd.Result() == 2) |