diff options
Diffstat (limited to 'utils')
| -rw-r--r-- | utils/Makefile.am | 8 | ||||
| -rw-r--r-- | utils/atools_net.cc | 363 | 
2 files changed, 369 insertions, 2 deletions
| diff --git a/utils/Makefile.am b/utils/Makefile.am index c858ac7e..ceffd5d6 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,4 +1,4 @@ -bin_PROGRAMS = reconstruct_weights atools dedup_corpus +bin_PROGRAMS = reconstruct_weights atools atools_net dedup_corpus  noinst_PROGRAMS = \    ts \ @@ -98,6 +98,10 @@ atools_SOURCES = atools.cc  atools_LDADD = libutils.a  atools_LDFLAGS = $(STATIC_FLAGS) +atools_net_SOURCES = atools_net.cc nn.hpp +atools_net_LDADD = libutils.a +atools_net_LDFLAGS = $(STATIC_FLAGS) /srv/postedit/lib/nanomsg-0.5-beta/lib/libnanomsg.so +  dedup_corpus_SOURCES = dedup_corpus.cc  dedup_corpus_LDADD = libutils.a  dedup_corpus_LDFLAGS = $(STATIC_FLAGS) @@ -123,5 +127,5 @@ stringlib_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_U  ################################################################  # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I$(top_srcdir) -DTEST_DATA=\"$(top_srcdir)/utils/test_data\" +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I$(top_srcdir) -DTEST_DATA=\"$(top_srcdir)/utils/test_data\" -I/srv/postedit/lib/nanomsg-0.5-beta/include -I/srv/postedit/lib/cppnanomsg  ################################################################ diff --git a/utils/atools_net.cc b/utils/atools_net.cc new file mode 100644 index 00000000..fa5dfb96 --- /dev/null +++ b/utils/atools_net.cc @@ -0,0 +1,363 @@ +#include <iostream> +#include <sstream> +#include <vector> + +#include <queue> +#include <map> +#include <boost/program_options.hpp> +#include <boost/shared_ptr.hpp> + +#include "filelib.h" +#include "alignment_io.h" + +#include <boost/algorithm/string.hpp> +#include <boost/regex.hpp> +#include <boost/algorithm/string/regex.hpp> + +#include <nanomsg/nn.h> +#include <nanomsg/pair.h> +#include "nn.hpp" + +namespace po = boost::program_options; +using namespace std; + +struct Command { +  virtual ~Command() {} +  virtual string Name() const = 0; + +  // returns 1 for alignment grid output [default] +  // returns 2 if Summary() should be called [for AER, etc] +  virtual int Result() const { return 1; } + +  virtual bool RequiresTwoOperands() const { return true; } +  virtual void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) = 0; +  void EnsureSize(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { +    x->resize(max(a.width(), b.width()), max(a.height(), b.height())); +  } +  static bool Safe(const Array2D<bool>& a, int i, int j) { +    if (i >= 0 && j >= 0 && i < static_cast<int>(a.width()) && j < static_cast<int>(a.height())) +      return a(i,j); +    else +      return false; +  } +  virtual void Summary() { assert(!"Summary should have been overridden"); } +}; + +// compute fmeasure, second alignment is reference, first is hyp +struct FMeasureCommand : public Command { +  FMeasureCommand() : matches(), num_predicted(), num_in_ref() {} +  int Result() const { return 2; } +  string Name() const { return "fmeasure"; } +  bool RequiresTwoOperands() const { return true; } +  void Apply(const Array2D<bool>& hyp, const Array2D<bool>& ref, Array2D<bool>* x) { +    (void) x;   // AER just computes statistics, not an alignment +    unsigned i_len = ref.width(); +    unsigned j_len = ref.height(); +    for (unsigned i = 0; i < i_len; ++i) { +      for (unsigned j = 0; j < j_len; ++j) { +        if (ref(i,j)) { +          ++num_in_ref; +          if (Safe(hyp, i, j)) ++matches; +        } +      } +    } +    for (unsigned i = 0; i < hyp.width(); ++i) +      for (unsigned j = 0; j < hyp.height(); ++j) +        if (hyp(i,j)) ++num_predicted; +  } +  void Summary() { +    if (num_predicted == 0 || num_in_ref == 0) { +      cerr << "Insufficient statistics to compute f-measure!\n"; +      abort(); +    } +    const double prec = static_cast<double>(matches) / num_predicted; +    const double rec = static_cast<double>(matches) / num_in_ref; +    cout << "P: " << prec << endl; +    cout << "R: " << rec << endl; +    const double f = (2.0 * prec * rec) / (rec + prec); +    cout << "F: " << f << endl; +  } +  int matches; +  int num_predicted; +  int num_in_ref; +}; + +struct DisplayCommand : public Command { +  string Name() const { return "display"; } +  bool RequiresTwoOperands() const { return false; } +  void Apply(const Array2D<bool>& in, const Array2D<bool>&, Array2D<bool>* x) { +    *x = in; +    cout << *x << endl; +  } +}; + +struct ConvertCommand : public Command { +  string Name() const { return "convert"; } +  bool RequiresTwoOperands() const { return false; } +  void Apply(const Array2D<bool>& in, const Array2D<bool>&, Array2D<bool>* x) { +    *x = in; +  } +}; + +struct InvertCommand : public Command { +  string Name() const { return "invert"; } +  bool RequiresTwoOperands() const { return false; } +  void Apply(const Array2D<bool>& in, const Array2D<bool>&, Array2D<bool>* x) { +    Array2D<bool>& res = *x; +    res.resize(in.height(), in.width()); +    for (unsigned i = 0; i < in.height(); ++i) +      for (unsigned j = 0; j < in.width(); ++j) +        res(i, j) = in(j, i); +  } +}; + +struct IntersectCommand : public Command { +  string Name() const { return "intersect"; } +  bool RequiresTwoOperands() const { return true; } +  void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { +    EnsureSize(a, b, x); +    Array2D<bool>& res = *x; +    for (unsigned i = 0; i < a.width(); ++i) +      for (unsigned j = 0; j < a.height(); ++j) +        res(i, j) = Safe(a, i, j) && Safe(b, i, j); +  } +}; + +struct UnionCommand : public Command { +  string Name() const { return "union"; } +  bool RequiresTwoOperands() const { return true; } +  void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { +    EnsureSize(a, b, x); +    Array2D<bool>& res = *x; +    for (unsigned i = 0; i < res.width(); ++i) +      for (unsigned j = 0; j < res.height(); ++j) +        res(i, j) = Safe(a, i, j) || Safe(b, i, j); +  } +}; + +struct RefineCommand : public Command { +  RefineCommand() { +    neighbors_.push_back(make_pair(1,0)); +    neighbors_.push_back(make_pair(-1,0)); +    neighbors_.push_back(make_pair(0,1)); +    neighbors_.push_back(make_pair(0,-1)); +  } +  bool RequiresTwoOperands() const { return true; } + +  void Align(unsigned i, unsigned j) { +    res_(i, j) = true; +    is_i_aligned_[i] = true; +    is_j_aligned_[j] = true; +  } + +  bool IsNeighborAligned(int i, int j) const { +    for (unsigned k = 0; k < neighbors_.size(); ++k) { +      const int di = neighbors_[k].first; +      const int dj = neighbors_[k].second; +      if (Safe(res_, i + di, j + dj)) +        return true; +    } +    return false; +  } + +  bool IsNeitherAligned(int i, int j) const { +    return !(is_i_aligned_[i] || is_j_aligned_[j]); +  } + +  bool IsOneOrBothUnaligned(int i, int j) const { +    return !(is_i_aligned_[i] && is_j_aligned_[j]); +  } + +  bool KoehnAligned(int i, int j) const { +    return IsOneOrBothUnaligned(i, j) && IsNeighborAligned(i, j); +  } + +  typedef bool (RefineCommand::*Predicate)(int i, int j) const; + + protected: +  void InitRefine( +      const Array2D<bool>& a, +      const Array2D<bool>& b) { +    res_.clear(); +    EnsureSize(a, b, &res_); +    in_.clear(); un_.clear(); is_i_aligned_.clear(); is_j_aligned_.clear(); +    EnsureSize(a, b, &in_); +    EnsureSize(a, b, &un_); +    is_i_aligned_.resize(res_.width(), false); +    is_j_aligned_.resize(res_.height(), false); +    for (unsigned i = 0; i < in_.width(); ++i) +      for (unsigned j = 0; j < in_.height(); ++j) { +        un_(i, j) = Safe(a, i, j) || Safe(b, i, j); +        in_(i, j) = Safe(a, i, j) && Safe(b, i, j); +        if (in_(i, j)) Align(i, j); +    } +  } +  // "grow" the resulting alignment using the points in adds +  // if they match the constraints determined by pred +  void Grow(Predicate pred, bool idempotent, const Array2D<bool>& adds) { +    if (idempotent) { +      for (unsigned i = 0; i < adds.width(); ++i) +        for (unsigned j = 0; j < adds.height(); ++j) { +          if (adds(i, j) && !res_(i, j) && +              (this->*pred)(i, j)) Align(i, j); +        } +      return; +    } +    set<pair<int, int> > p; +    for (unsigned i = 0; i < adds.width(); ++i) +      for (unsigned j = 0; j < adds.height(); ++j) +        if (adds(i, j) && !res_(i, j)) +          p.insert(make_pair(i, j)); +    bool keep_going = !p.empty(); +    while (keep_going) { +      keep_going = false; +      set<pair<int, int> > added; +      for (set<pair<int, int> >::iterator pi = p.begin(); pi != p.end(); ++pi) { +        if ((this->*pred)(pi->first, pi->second)) { +          Align(pi->first, pi->second); +          added.insert(make_pair(pi->first, pi->second)); +          keep_going = true; +        } +      } +      for (set<pair<int, int> >::iterator ai = added.begin(); ai != added.end(); ++ai) +        p.erase(*ai); +    } +  } +  Array2D<bool> res_;  // refined alignment +  Array2D<bool> in_;   // intersection alignment +  Array2D<bool> un_;   // union alignment +  vector<bool> is_i_aligned_; +  vector<bool> is_j_aligned_; +  vector<pair<int,int> > neighbors_; +}; + +struct DiagCommand : public RefineCommand { +  DiagCommand() { +    neighbors_.push_back(make_pair(1,1)); +    neighbors_.push_back(make_pair(-1,1)); +    neighbors_.push_back(make_pair(1,-1)); +    neighbors_.push_back(make_pair(-1,-1)); +  } +}; + +struct GDCommand : public DiagCommand { +  string Name() const { return "grow-diag"; } +  void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { +    InitRefine(a, b); +    Grow(&RefineCommand::KoehnAligned, false, un_); +    *x = res_; +  } +}; + +struct GDFCommand : public DiagCommand { +  string Name() const { return "grow-diag-final"; } +  void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { +    InitRefine(a, b); +    Grow(&RefineCommand::KoehnAligned, false, un_); +    Grow(&RefineCommand::IsOneOrBothUnaligned, true, a); +    Grow(&RefineCommand::IsOneOrBothUnaligned, true, b); +    *x = res_; +  } +}; + +struct GDFACommand : public DiagCommand { +  string Name() const { return "grow-diag-final-and"; } +  void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { +    InitRefine(a, b); +    Grow(&RefineCommand::KoehnAligned, false, un_); +    Grow(&RefineCommand::IsNeitherAligned, true, a); +    Grow(&RefineCommand::IsNeitherAligned, true, b); +    *x = res_; +  } +}; + +map<string, boost::shared_ptr<Command> > commands; + +void InitCommandLine(unsigned argc, char** argv, po::variables_map* conf) { +  po::options_description opts("Configuration options"); +  ostringstream os; +  os << "Operation to perform:"; +  for (map<string, boost::shared_ptr<Command> >::iterator it = commands.begin(); +       it != commands.end(); ++it) { +    os << ' ' << it->first; +  } +  string cstr = os.str(); +  opts.add_options() +        ("command,c", po::value<string>()->default_value("convert"), cstr.c_str()) +        ("sock_url,S", po::value<string>()->default_value("tcp://127.0.0.1:60665")) +        ("help,h", "Print this help message and exit"); +  po::options_description clo("Command line options"); +  po::options_description dcmdline_options; +  dcmdline_options.add(opts); + +  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); +  po::notify(*conf); + +  if (conf->count("help") || conf->count("command") == 0) { +    cerr << dcmdline_options << endl; +    exit(1); +  } +  const string cmd = (*conf)["command"].as<string>(); +  if (commands.count(cmd) == 0) { +    cerr << "Don't understand command: " << cmd << endl; +    exit(1); +  } +} + +template<class C> static void AddCommand() { +  C* c = new C; +  commands[c->Name()].reset(c); +} + +int main(int argc, char **argv) { +  AddCommand<ConvertCommand>(); +  AddCommand<DisplayCommand>(); +  AddCommand<InvertCommand>(); +  AddCommand<IntersectCommand>(); +  AddCommand<UnionCommand>(); +  AddCommand<GDCommand>(); +  AddCommand<GDFCommand>(); +  AddCommand<GDFACommand>(); +  AddCommand<FMeasureCommand>(); +  po::variables_map conf; +  InitCommandLine(argc, argv, &conf); +  Command& cmd = *commands[conf["command"].as<string>()]; + +  nn::socket sock(AF_SP, NN_PAIR); +  string url = conf["sock_url"].as<string>(); +  sock.bind(url.c_str()); +  int to = 100; +  sock.setsockopt(NN_SOL_SOCKET, NN_RCVTIMEO, &to, sizeof (to)); +  string hello = "hello"; +  sock.send(hello.c_str(), hello.size()+1, 0); + +  while (true) { +    char* buf = NULL; +    size_t sz = sock.recv(&buf, NN_MSG, 0); +    if (buf) { +      const string in(buf, buf+sz); +      nn::freemsg(buf); +      if (in == "shutdown") { +        string off = "off"; +        sock.send(off.c_str(), off.size()+1, 0); +        break; +      } +      vector<string> parts; +      boost::algorithm::split_regex(parts, in, boost::regex(" \\|\\|\\| ")); +      boost::shared_ptr<Array2D<bool> > out(new Array2D<bool>); +      boost::shared_ptr<Array2D<bool> > a1 = AlignmentIO::ReadPharaohAlignmentGrid(parts[0]); +      boost::shared_ptr<Array2D<bool> > a2 = AlignmentIO::ReadPharaohAlignmentGrid(parts[1]); +      cmd.Apply(*a1, *a2, out.get()); +      ostringstream os; +      if (cmd.Result() == 1) { +        AlignmentIO::SerializePharaohFormat(*out, &os); +      } else { +        // error +      } +      sock.send(os.str().c_str(), os.str().size()+1, 0); +    } +  } + +  return 0; +} + | 
