From 21a4b6629fedae575583f0d1e34c97dba8de2511 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 10 Feb 2012 13:17:12 -0500 Subject: clean up alignment tools --- training/atools.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/training/atools.cc b/training/atools.cc index 42579627..82e30c38 100644 --- a/training/atools.cc +++ b/training/atools.cc @@ -8,7 +8,6 @@ #include #include "filelib.h" -#include "aligner.h" #include "alignment_pharaoh.h" namespace po = boost::program_options; @@ -79,7 +78,7 @@ struct FMeasureCommand : public Command { struct DisplayCommand : public Command { string Name() const { return "display"; } bool RequiresTwoOperands() const { return false; } - void Apply(const Array2D& in, const Array2D¬_used, Array2D* x) { + void Apply(const Array2D& in, const Array2D&, Array2D* x) { *x = in; cout << *x << endl; } @@ -88,7 +87,7 @@ struct DisplayCommand : public Command { struct ConvertCommand : public Command { string Name() const { return "convert"; } bool RequiresTwoOperands() const { return false; } - void Apply(const Array2D& in, const Array2D¬_used, Array2D* x) { + void Apply(const Array2D& in, const Array2D&, Array2D* x) { *x = in; } }; @@ -96,7 +95,7 @@ struct ConvertCommand : public Command { struct InvertCommand : public Command { string Name() const { return "invert"; } bool RequiresTwoOperands() const { return false; } - void Apply(const Array2D& in, const Array2D¬_used, Array2D* x) { + void Apply(const Array2D& in, const Array2D&, Array2D* x) { Array2D& res = *x; res.resize(in.height(), in.width()); for (int i = 0; i < in.height(); ++i) @@ -275,8 +274,8 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } string cstr = os.str(); opts.add_options() - ("input_1,i", po::value(), "[REQ] Alignment 1 file, - for STDIN") - ("input_2,j", po::value(), "[OPT] Alignment 2 file, - for STDIN") + ("input_1,i", po::value(), "[REQUIRED] Alignment 1 file, - for STDIN") + ("input_2,j", po::value(), "Alignment 2 file, - for STDIN") ("command,c", po::value()->default_value("convert"), cstr.c_str()) ("help,h", "Print this help message and exit"); po::options_description clo("Command line options"); -- cgit v1.2.3 From 50105660d8c18889e8908cf3e4c583b551dc05af Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 10 Feb 2012 13:18:59 -0500 Subject: move atools to utils directory --- training/Makefile.am | 4 - training/atools.cc | 369 --------------------------------------------------- utils/Makefile.am | 4 +- utils/atools.cc | 369 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 372 insertions(+), 374 deletions(-) delete mode 100644 training/atools.cc create mode 100644 utils/atools.cc diff --git a/training/Makefile.am b/training/Makefile.am index 2a11ae52..d2f1ccc5 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -6,7 +6,6 @@ bin_PROGRAMS = \ mr_reduce_to_weights \ mr_optimize_reduce \ grammar_convert \ - atools \ plftools \ collapse_weights \ mpi_extract_reachable \ @@ -47,9 +46,6 @@ augment_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/lib test_ngram_SOURCES = test_ngram.cc test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -atools_SOURCES = atools.cc -atools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz - model1_SOURCES = model1.cc ttables.cc model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz diff --git a/training/atools.cc b/training/atools.cc deleted file mode 100644 index 82e30c38..00000000 --- a/training/atools.cc +++ /dev/null @@ -1,369 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - -#include "filelib.h" -#include "alignment_pharaoh.h" - -namespace po = boost::program_options; -using namespace std; -using boost::shared_ptr; - -struct Command { - virtual ~Command() {} - virtual string Name() const = 0; - - // returns 1 for alignment grid output [default] - // returns 2 if Summary() should be called [for AER, etc] - virtual int Result() const { return 1; } - - virtual bool RequiresTwoOperands() const { return true; } - virtual void Apply(const Array2D& a, const Array2D& b, Array2D* x) = 0; - void EnsureSize(const Array2D& a, const Array2D& b, Array2D* x) { - x->resize(max(a.width(), b.width()), max(a.height(), b.height())); - } - static bool Safe(const Array2D& a, int i, int j) { - if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) - return a(i,j); - else - return false; - } - virtual void Summary() { assert(!"Summary should have been overridden"); } -}; - -// compute fmeasure, second alignment is reference, first is hyp -struct FMeasureCommand : public Command { - FMeasureCommand() : matches(), num_predicted(), num_in_ref() {} - int Result() const { return 2; } - string Name() const { return "fmeasure"; } - bool RequiresTwoOperands() const { return true; } - void Apply(const Array2D& hyp, const Array2D& ref, Array2D* x) { - (void) x; // AER just computes statistics, not an alignment - int i_len = ref.width(); - int j_len = ref.height(); - for (int i = 0; i < i_len; ++i) { - for (int j = 0; j < j_len; ++j) { - if (ref(i,j)) { - ++num_in_ref; - if (Safe(hyp, i, j)) ++matches; - } - } - } - for (int i = 0; i < hyp.width(); ++i) - for (int j = 0; j < hyp.height(); ++j) - if (hyp(i,j)) ++num_predicted; - } - void Summary() { - if (num_predicted == 0 || num_in_ref == 0) { - cerr << "Insufficient statistics to compute f-measure!\n"; - abort(); - } - const double prec = static_cast(matches) / num_predicted; - const double rec = static_cast(matches) / num_in_ref; - cout << "P: " << prec << endl; - cout << "R: " << rec << endl; - const double f = (2.0 * prec * rec) / (rec + prec); - cout << "F: " << f << endl; - } - int matches; - int num_predicted; - int num_in_ref; -}; - -struct DisplayCommand : public Command { - string Name() const { return "display"; } - bool RequiresTwoOperands() const { return false; } - void Apply(const Array2D& in, const Array2D&, Array2D* x) { - *x = in; - cout << *x << endl; - } -}; - -struct ConvertCommand : public Command { - string Name() const { return "convert"; } - bool RequiresTwoOperands() const { return false; } - void Apply(const Array2D& in, const Array2D&, Array2D* x) { - *x = in; - } -}; - -struct InvertCommand : public Command { - string Name() const { return "invert"; } - bool RequiresTwoOperands() const { return false; } - void Apply(const Array2D& in, const Array2D&, Array2D* x) { - Array2D& res = *x; - res.resize(in.height(), in.width()); - for (int i = 0; i < in.height(); ++i) - for (int j = 0; j < in.width(); ++j) - res(i, j) = in(j, i); - } -}; - -struct IntersectCommand : public Command { - string Name() const { return "intersect"; } - bool RequiresTwoOperands() const { return true; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - EnsureSize(a, b, x); - Array2D& res = *x; - for (int i = 0; i < a.width(); ++i) - for (int j = 0; j < a.height(); ++j) - res(i, j) = Safe(a, i, j) && Safe(b, i, j); - } -}; - -struct UnionCommand : public Command { - string Name() const { return "union"; } - bool RequiresTwoOperands() const { return true; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - EnsureSize(a, b, x); - Array2D& res = *x; - for (int i = 0; i < res.width(); ++i) - for (int j = 0; j < res.height(); ++j) - res(i, j) = Safe(a, i, j) || Safe(b, i, j); - } -}; - -struct RefineCommand : public Command { - RefineCommand() { - neighbors_.push_back(make_pair(1,0)); - neighbors_.push_back(make_pair(-1,0)); - neighbors_.push_back(make_pair(0,1)); - neighbors_.push_back(make_pair(0,-1)); - } - bool RequiresTwoOperands() const { return true; } - - void Align(int i, int j) { - res_(i, j) = true; - is_i_aligned_[i] = true; - is_j_aligned_[j] = true; - } - - bool IsNeighborAligned(int i, int j) const { - for (int k = 0; k < neighbors_.size(); ++k) { - const int di = neighbors_[k].first; - const int dj = neighbors_[k].second; - if (Safe(res_, i + di, j + dj)) - return true; - } - return false; - } - - bool IsNeitherAligned(int i, int j) const { - return !(is_i_aligned_[i] || is_j_aligned_[j]); - } - - bool IsOneOrBothUnaligned(int i, int j) const { - return !(is_i_aligned_[i] && is_j_aligned_[j]); - } - - bool KoehnAligned(int i, int j) const { - return IsOneOrBothUnaligned(i, j) && IsNeighborAligned(i, j); - } - - typedef bool (RefineCommand::*Predicate)(int i, int j) const; - - protected: - void InitRefine( - const Array2D& a, - const Array2D& b) { - res_.clear(); - EnsureSize(a, b, &res_); - in_.clear(); un_.clear(); is_i_aligned_.clear(); is_j_aligned_.clear(); - EnsureSize(a, b, &in_); - EnsureSize(a, b, &un_); - is_i_aligned_.resize(res_.width(), false); - is_j_aligned_.resize(res_.height(), false); - for (int i = 0; i < in_.width(); ++i) - for (int j = 0; j < in_.height(); ++j) { - un_(i, j) = Safe(a, i, j) || Safe(b, i, j); - in_(i, j) = Safe(a, i, j) && Safe(b, i, j); - if (in_(i, j)) Align(i, j); - } - } - // "grow" the resulting alignment using the points in adds - // if they match the constraints determined by pred - void Grow(Predicate pred, bool idempotent, const Array2D& adds) { - if (idempotent) { - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) { - if (adds(i, j) && !res_(i, j) && - (this->*pred)(i, j)) Align(i, j); - } - return; - } - set > p; - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) - if (adds(i, j) && !res_(i, j)) - p.insert(make_pair(i, j)); - bool keep_going = !p.empty(); - while (keep_going) { - keep_going = false; - for (set >::iterator pi = p.begin(); - pi != p.end(); ++pi) { - if ((this->*pred)(pi->first, pi->second)) { - Align(pi->first, pi->second); - p.erase(pi); - keep_going = true; - } - } - } - } - Array2D res_; // refined alignment - Array2D in_; // intersection alignment - Array2D un_; // union alignment - vector is_i_aligned_; - vector is_j_aligned_; - vector > neighbors_; -}; - -struct DiagCommand : public RefineCommand { - DiagCommand() { - neighbors_.push_back(make_pair(1,1)); - neighbors_.push_back(make_pair(-1,1)); - neighbors_.push_back(make_pair(1,-1)); - neighbors_.push_back(make_pair(-1,-1)); - } -}; - -struct GDCommand : public DiagCommand { - string Name() const { return "grow-diag"; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - InitRefine(a, b); - Grow(&RefineCommand::KoehnAligned, false, un_); - *x = res_; - } -}; - -struct GDFCommand : public DiagCommand { - string Name() const { return "grow-diag-final"; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - InitRefine(a, b); - Grow(&RefineCommand::KoehnAligned, false, un_); - Grow(&RefineCommand::IsOneOrBothUnaligned, true, a); - Grow(&RefineCommand::IsOneOrBothUnaligned, true, b); - *x = res_; - } -}; - -struct GDFACommand : public DiagCommand { - string Name() const { return "grow-diag-final-and"; } - void Apply(const Array2D& a, const Array2D& b, Array2D* x) { - InitRefine(a, b); - Grow(&RefineCommand::KoehnAligned, false, un_); - Grow(&RefineCommand::IsNeitherAligned, true, a); - Grow(&RefineCommand::IsNeitherAligned, true, b); - *x = res_; - } -}; - -map > commands; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - ostringstream os; - os << "[REQ] Operation to perform:"; - for (map >::iterator it = commands.begin(); - it != commands.end(); ++it) { - os << ' ' << it->first; - } - string cstr = os.str(); - opts.add_options() - ("input_1,i", po::value(), "[REQUIRED] Alignment 1 file, - for STDIN") - ("input_2,j", po::value(), "Alignment 2 file, - for STDIN") - ("command,c", po::value()->default_value("convert"), cstr.c_str()) - ("help,h", "Print this help message and exit"); - po::options_description clo("Command line options"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - po::notify(*conf); - - if (conf->count("help") || conf->count("input_1") == 0 || conf->count("command") == 0) { - cerr << dcmdline_options << endl; - exit(1); - } - const string cmd = (*conf)["command"].as(); - if (commands.count(cmd) == 0) { - cerr << "Don't understand command: " << cmd << endl; - exit(1); - } - if (commands[cmd]->RequiresTwoOperands()) { - if (conf->count("input_2") == 0) { - cerr << "Command '" << cmd << "' requires two alignment files\n"; - exit(1); - } - if ((*conf)["input_1"].as() == "-" && (*conf)["input_2"].as() == "-") { - cerr << "Both inputs cannot be STDIN\n"; - exit(1); - } - } else { - if (conf->count("input_2") != 0) { - cerr << "Command '" << cmd << "' requires only one alignment file\n"; - exit(1); - } - } -} - -template static void AddCommand() { - C* c = new C; - commands[c->Name()].reset(c); -} - -int main(int argc, char **argv) { - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - AddCommand(); - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - Command& cmd = *commands[conf["command"].as()]; - boost::shared_ptr rf1(new ReadFile(conf["input_1"].as())); - boost::shared_ptr rf2; - if (cmd.RequiresTwoOperands()) - rf2.reset(new ReadFile(conf["input_2"].as())); - istream* in1 = rf1->stream(); - istream* in2 = NULL; - if (rf2) in2 = rf2->stream(); - while(*in1) { - string line1; - string line2; - getline(*in1, line1); - if (in2) { - getline(*in2, line2); - if ((*in1 && !*in2) || (*in2 && !*in1)) { - cerr << "Mismatched number of lines!\n"; - exit(1); - } - } - if (line1.empty() && !*in1) break; - shared_ptr > out(new Array2D); - shared_ptr > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1); - if (in2) { - shared_ptr > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2); - cmd.Apply(*a1, *a2, out.get()); - } else { - Array2D dummy; - cmd.Apply(*a1, dummy, out.get()); - } - - if (cmd.Result() == 1) { - AlignmentPharaoh::SerializePharaohFormat(*out, &cout); - } - } - if (cmd.Result() == 2) - cmd.Summary(); - return 0; -} - diff --git a/utils/Makefile.am b/utils/Makefile.am index a1ea8270..6e0678de 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,5 +1,5 @@ -bin_PROGRAMS = reconstruct_weights +bin_PROGRAMS = reconstruct_weights atools noinst_PROGRAMS = ts phmt mfcr_test TESTS = ts phmt mfcr_test @@ -17,6 +17,8 @@ endif reconstruct_weights_SOURCES = reconstruct_weights.cc +atools_SOURCES = atools.cc + noinst_LIBRARIES = libutils.a libutils_a_SOURCES = \ diff --git a/utils/atools.cc b/utils/atools.cc new file mode 100644 index 00000000..c0a91731 --- /dev/null +++ b/utils/atools.cc @@ -0,0 +1,369 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include "filelib.h" +#include "alignment_pharaoh.h" + +namespace po = boost::program_options; +using namespace std; +using boost::shared_ptr; + +struct Command { + virtual ~Command() {} + virtual string Name() const = 0; + + // returns 1 for alignment grid output [default] + // returns 2 if Summary() should be called [for AER, etc] + virtual int Result() const { return 1; } + + virtual bool RequiresTwoOperands() const { return true; } + virtual void Apply(const Array2D& a, const Array2D& b, Array2D* x) = 0; + void EnsureSize(const Array2D& a, const Array2D& b, Array2D* x) { + x->resize(max(a.width(), b.width()), max(a.height(), b.height())); + } + static bool Safe(const Array2D& a, int i, int j) { + if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) + return a(i,j); + else + return false; + } + virtual void Summary() { assert(!"Summary should have been overridden"); } +}; + +// compute fmeasure, second alignment is reference, first is hyp +struct FMeasureCommand : public Command { + FMeasureCommand() : matches(), num_predicted(), num_in_ref() {} + int Result() const { return 2; } + string Name() const { return "fmeasure"; } + bool RequiresTwoOperands() const { return true; } + void Apply(const Array2D& hyp, const Array2D& ref, Array2D* x) { + (void) x; // AER just computes statistics, not an alignment + int i_len = ref.width(); + int j_len = ref.height(); + for (int i = 0; i < i_len; ++i) { + for (int j = 0; j < j_len; ++j) { + if (ref(i,j)) { + ++num_in_ref; + if (Safe(hyp, i, j)) ++matches; + } + } + } + for (int i = 0; i < hyp.width(); ++i) + for (int j = 0; j < hyp.height(); ++j) + if (hyp(i,j)) ++num_predicted; + } + void Summary() { + if (num_predicted == 0 || num_in_ref == 0) { + cerr << "Insufficient statistics to compute f-measure!\n"; + abort(); + } + const double prec = static_cast(matches) / num_predicted; + const double rec = static_cast(matches) / num_in_ref; + cout << "P: " << prec << endl; + cout << "R: " << rec << endl; + const double f = (2.0 * prec * rec) / (rec + prec); + cout << "F: " << f << endl; + } + int matches; + int num_predicted; + int num_in_ref; +}; + +struct DisplayCommand : public Command { + string Name() const { return "display"; } + bool RequiresTwoOperands() const { return false; } + void Apply(const Array2D& in, const Array2D&, Array2D* x) { + *x = in; + cout << *x << endl; + } +}; + +struct ConvertCommand : public Command { + string Name() const { return "convert"; } + bool RequiresTwoOperands() const { return false; } + void Apply(const Array2D& in, const Array2D&, Array2D* x) { + *x = in; + } +}; + +struct InvertCommand : public Command { + string Name() const { return "invert"; } + bool RequiresTwoOperands() const { return false; } + void Apply(const Array2D& in, const Array2D&, Array2D* x) { + Array2D& res = *x; + res.resize(in.height(), in.width()); + for (int i = 0; i < in.height(); ++i) + for (int j = 0; j < in.width(); ++j) + res(i, j) = in(j, i); + } +}; + +struct IntersectCommand : public Command { + string Name() const { return "intersect"; } + bool RequiresTwoOperands() const { return true; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + EnsureSize(a, b, x); + Array2D& res = *x; + for (int i = 0; i < a.width(); ++i) + for (int j = 0; j < a.height(); ++j) + res(i, j) = Safe(a, i, j) && Safe(b, i, j); + } +}; + +struct UnionCommand : public Command { + string Name() const { return "union"; } + bool RequiresTwoOperands() const { return true; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + EnsureSize(a, b, x); + Array2D& res = *x; + for (int i = 0; i < res.width(); ++i) + for (int j = 0; j < res.height(); ++j) + res(i, j) = Safe(a, i, j) || Safe(b, i, j); + } +}; + +struct RefineCommand : public Command { + RefineCommand() { + neighbors_.push_back(make_pair(1,0)); + neighbors_.push_back(make_pair(-1,0)); + neighbors_.push_back(make_pair(0,1)); + neighbors_.push_back(make_pair(0,-1)); + } + bool RequiresTwoOperands() const { return true; } + + void Align(int i, int j) { + res_(i, j) = true; + is_i_aligned_[i] = true; + is_j_aligned_[j] = true; + } + + bool IsNeighborAligned(int i, int j) const { + for (int k = 0; k < neighbors_.size(); ++k) { + const int di = neighbors_[k].first; + const int dj = neighbors_[k].second; + if (Safe(res_, i + di, j + dj)) + return true; + } + return false; + } + + bool IsNeitherAligned(int i, int j) const { + return !(is_i_aligned_[i] || is_j_aligned_[j]); + } + + bool IsOneOrBothUnaligned(int i, int j) const { + return !(is_i_aligned_[i] && is_j_aligned_[j]); + } + + bool KoehnAligned(int i, int j) const { + return IsOneOrBothUnaligned(i, j) && IsNeighborAligned(i, j); + } + + typedef bool (RefineCommand::*Predicate)(int i, int j) const; + + protected: + void InitRefine( + const Array2D& a, + const Array2D& b) { + res_.clear(); + EnsureSize(a, b, &res_); + in_.clear(); un_.clear(); is_i_aligned_.clear(); is_j_aligned_.clear(); + EnsureSize(a, b, &in_); + EnsureSize(a, b, &un_); + is_i_aligned_.resize(res_.width(), false); + is_j_aligned_.resize(res_.height(), false); + for (int i = 0; i < in_.width(); ++i) + for (int j = 0; j < in_.height(); ++j) { + un_(i, j) = Safe(a, i, j) || Safe(b, i, j); + in_(i, j) = Safe(a, i, j) && Safe(b, i, j); + if (in_(i, j)) Align(i, j); + } + } + // "grow" the resulting alignment using the points in adds + // if they match the constraints determined by pred + void Grow(Predicate pred, bool idempotent, const Array2D& adds) { + if (idempotent) { + for (int i = 0; i < adds.width(); ++i) + for (int j = 0; j < adds.height(); ++j) { + if (adds(i, j) && !res_(i, j) && + (this->*pred)(i, j)) Align(i, j); + } + return; + } + set > p; + for (int i = 0; i < adds.width(); ++i) + for (int j = 0; j < adds.height(); ++j) + if (adds(i, j) && !res_(i, j)) + p.insert(make_pair(i, j)); + bool keep_going = !p.empty(); + while (keep_going) { + keep_going = false; + for (set >::iterator pi = p.begin(); + pi != p.end(); ++pi) { + if ((this->*pred)(pi->first, pi->second)) { + Align(pi->first, pi->second); + p.erase(pi); + keep_going = true; + } + } + } + } + Array2D res_; // refined alignment + Array2D in_; // intersection alignment + Array2D un_; // union alignment + vector is_i_aligned_; + vector is_j_aligned_; + vector > neighbors_; +}; + +struct DiagCommand : public RefineCommand { + DiagCommand() { + neighbors_.push_back(make_pair(1,1)); + neighbors_.push_back(make_pair(-1,1)); + neighbors_.push_back(make_pair(1,-1)); + neighbors_.push_back(make_pair(-1,-1)); + } +}; + +struct GDCommand : public DiagCommand { + string Name() const { return "grow-diag"; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + *x = res_; + } +}; + +struct GDFCommand : public DiagCommand { + string Name() const { return "grow-diag-final"; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + Grow(&RefineCommand::IsOneOrBothUnaligned, true, a); + Grow(&RefineCommand::IsOneOrBothUnaligned, true, b); + *x = res_; + } +}; + +struct GDFACommand : public DiagCommand { + string Name() const { return "grow-diag-final-and"; } + void Apply(const Array2D& a, const Array2D& b, Array2D* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + Grow(&RefineCommand::IsNeitherAligned, true, a); + Grow(&RefineCommand::IsNeitherAligned, true, b); + *x = res_; + } +}; + +map > commands; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + ostringstream os; + os << "Operation to perform:"; + for (map >::iterator it = commands.begin(); + it != commands.end(); ++it) { + os << ' ' << it->first; + } + string cstr = os.str(); + opts.add_options() + ("input_1,i", po::value(), "[REQUIRED] Alignment 1 file, - for STDIN") + ("input_2,j", po::value(), "Alignment 2 file, - for STDIN") + ("command,c", po::value()->default_value("convert"), cstr.c_str()) + ("help,h", "Print this help message and exit"); + po::options_description clo("Command line options"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + po::notify(*conf); + + if (conf->count("help") || conf->count("input_1") == 0 || conf->count("command") == 0) { + cerr << dcmdline_options << endl; + exit(1); + } + const string cmd = (*conf)["command"].as(); + if (commands.count(cmd) == 0) { + cerr << "Don't understand command: " << cmd << endl; + exit(1); + } + if (commands[cmd]->RequiresTwoOperands()) { + if (conf->count("input_2") == 0) { + cerr << "Command '" << cmd << "' requires two alignment files\n"; + exit(1); + } + if ((*conf)["input_1"].as() == "-" && (*conf)["input_2"].as() == "-") { + cerr << "Both inputs cannot be STDIN\n"; + exit(1); + } + } else { + if (conf->count("input_2") != 0) { + cerr << "Command '" << cmd << "' requires only one alignment file\n"; + exit(1); + } + } +} + +template static void AddCommand() { + C* c = new C; + commands[c->Name()].reset(c); +} + +int main(int argc, char **argv) { + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + AddCommand(); + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + Command& cmd = *commands[conf["command"].as()]; + boost::shared_ptr rf1(new ReadFile(conf["input_1"].as())); + boost::shared_ptr rf2; + if (cmd.RequiresTwoOperands()) + rf2.reset(new ReadFile(conf["input_2"].as())); + istream* in1 = rf1->stream(); + istream* in2 = NULL; + if (rf2) in2 = rf2->stream(); + while(*in1) { + string line1; + string line2; + getline(*in1, line1); + if (in2) { + getline(*in2, line2); + if ((*in1 && !*in2) || (*in2 && !*in1)) { + cerr << "Mismatched number of lines!\n"; + exit(1); + } + } + if (line1.empty() && !*in1) break; + shared_ptr > out(new Array2D); + shared_ptr > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1); + if (in2) { + shared_ptr > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2); + cmd.Apply(*a1, *a2, out.get()); + } else { + Array2D dummy; + cmd.Apply(*a1, dummy, out.get()); + } + + if (cmd.Result() == 1) { + AlignmentPharaoh::SerializePharaohFormat(*out, &cout); + } + } + if (cmd.Result() == 2) + cmd.Summary(); + return 0; +} + -- cgit v1.2.3 From f15bbfbf105ff873e89a8bdf55e845f3ac7b030e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 10 Feb 2012 18:31:05 +0000 Subject: better error checking --- gi/pf/guess-translits.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/gi/pf/guess-translits.pl b/gi/pf/guess-translits.pl index ab737121..aafec13a 100755 --- a/gi/pf/guess-translits.pl +++ b/gi/pf/guess-translits.pl @@ -28,6 +28,7 @@ while() { my %b2a; for my $ap (@as) { my ($a,$b) = split /-/, $ap; + die "BAD INPUT: $_\n" unless defined $a && defined $b; $a2b{$a}->{$b} = 1; $b2a{$b}->{$a} = 1; } -- cgit v1.2.3