summaryrefslogtreecommitdiff
path: root/decoder/aligner.cc
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-11 02:37:10 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-11 02:37:10 +0000
commita53461650fbdcd3cfe7543d28af9647ac3e5e47e (patch)
treee812756c733b34f9c16894265204acfa9f9998a9 /decoder/aligner.cc
parent19b59489bb600f438ad96f04ec5d5c5b6616c9c2 (diff)
major refactor, break bad circular deps
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@509 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/aligner.cc')
-rw-r--r--decoder/aligner.cc74
1 files changed, 2 insertions, 72 deletions
diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index b089f52e..92431be4 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -5,81 +5,11 @@
#include "sentence_metadata.h"
#include "inside_outside.h"
#include "viterbi.h"
+#include "alignment_pharaoh.h"
#include <set>
using namespace std;
-static bool is_digit(char x) { return x >= '0' && x <= '9'; }
-
-boost::shared_ptr<Array2D<bool> > AlignerTools::ReadPharaohAlignmentGrid(const string& al) {
- int max_x = 0;
- int max_y = 0;
- int i = 0;
- size_t pos = al.rfind(" ||| ");
- if (pos != string::npos) { i = pos + 5; }
- while (i < al.size()) {
- if (al[i] == '\n' || al[i] == '\r') break;
- int x = 0;
- while(i < al.size() && is_digit(al[i])) {
- x *= 10;
- x += al[i] - '0';
- ++i;
- }
- if (x > max_x) max_x = x;
- assert(i < al.size());
- if(al[i] != '-') {
- cerr << "BAD ALIGNMENT: " << al << endl;
- abort();
- }
- ++i;
- int y = 0;
- while(i < al.size() && is_digit(al[i])) {
- y *= 10;
- y += al[i] - '0';
- ++i;
- }
- if (y > max_y) max_y = y;
- while(i < al.size() && al[i] == ' ') { ++i; }
- }
-
- boost::shared_ptr<Array2D<bool> > grid(new Array2D<bool>(max_x + 1, max_y + 1));
- i = 0;
- if (pos != string::npos) { i = pos + 5; }
- while (i < al.size()) {
- if (al[i] == '\n' || al[i] == '\r') break;
- int x = 0;
- while(i < al.size() && is_digit(al[i])) {
- x *= 10;
- x += al[i] - '0';
- ++i;
- }
- assert(i < al.size());
- assert(al[i] == '-');
- ++i;
- int y = 0;
- while(i < al.size() && is_digit(al[i])) {
- y *= 10;
- y += al[i] - '0';
- ++i;
- }
- (*grid)(x, y) = true;
- while(i < al.size() && al[i] == ' ') { ++i; }
- }
- // cerr << *grid << endl;
- return grid;
-}
-
-void AlignerTools::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) {
- bool need_space = false;
- for (int i = 0; i < alignment.width(); ++i)
- for (int j = 0; j < alignment.height(); ++j)
- if (alignment(i,j)) {
- if (need_space) (*out) << ' '; else need_space = true;
- (*out) << i << '-' << j;
- }
- (*out) << endl;
-}
-
// used with lexical models since they may not fully generate the
// source string
void SourceEdgeCoveragesUsingParseIndices(const Hypergraph& g,
@@ -317,6 +247,6 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
cerr << grid << endl;
}
(*out) << TD::GetString(src_sent) << " ||| " << TD::GetString(trg_sent) << " ||| ";
- SerializePharaohFormat(grid, out);
+ AlignmentPharaoh::SerializePharaohFormat(grid, out);
};