diff options
author | Patrick Simianer <p@simianer.de> | 2015-05-13 18:03:03 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-05-13 18:03:03 +0200 |
commit | 03d47750699c15e8c55ca4610ed5682bcb279185 (patch) | |
tree | 8f1c845915974b75ab95dc32b970e1a214b51e45 | |
parent | 6c0fcee726662285e7c4cb3857ca28296f5c525c (diff) |
atools_net
-rw-r--r-- | utils/Makefile.am | 8 | ||||
-rw-r--r-- | utils/atools_net.cc | 358 |
2 files changed, 364 insertions, 2 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am index c858ac7e..f057ddbe 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,4 +1,4 @@ -bin_PROGRAMS = reconstruct_weights atools dedup_corpus +bin_PROGRAMS = reconstruct_weights atools atools_net dedup_corpus noinst_PROGRAMS = \ ts \ @@ -98,6 +98,10 @@ atools_SOURCES = atools.cc atools_LDADD = libutils.a atools_LDFLAGS = $(STATIC_FLAGS) +atools_net_SOURCES = atools_net.cc nn.hpp +atools_net_LDADD = libutils.a +atools_net_LDFLAGS = $(STATIC_FLAGS) /fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib/libnanomsg.so + dedup_corpus_SOURCES = dedup_corpus.cc dedup_corpus_LDADD = libutils.a dedup_corpus_LDFLAGS = $(STATIC_FLAGS) @@ -123,5 +127,5 @@ stringlib_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_U ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I$(top_srcdir) -DTEST_DATA=\"$(top_srcdir)/utils/test_data\" +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I$(top_srcdir) -DTEST_DATA=\"$(top_srcdir)/utils/test_data\" -I/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/include -I/fast_scratch/simianer/lfpe/cppnanomsg ################################################################ diff --git a/utils/atools_net.cc b/utils/atools_net.cc new file mode 100644 index 00000000..25f70bbe --- /dev/null +++ b/utils/atools_net.cc @@ -0,0 +1,358 @@ +#include <iostream> +#include <sstream> +#include <vector> + +#include <queue> +#include <map> +#include <boost/program_options.hpp> +#include <boost/shared_ptr.hpp> + +#include "filelib.h" +#include "alignment_io.h" + +#include <boost/algorithm/string.hpp> +#include <boost/regex.hpp> +#include <boost/algorithm/string/regex.hpp> + +#include <nanomsg/nn.h> +#include <nanomsg/pair.h> +#include "nn.hpp" + +namespace po = boost::program_options; +using namespace std; + +struct Command { + virtual ~Command() {} + virtual string Name() const = 0; + + // returns 1 for alignment grid output [default] + // returns 2 if Summary() should be called [for AER, etc] + virtual int Result() const { return 1; } + + virtual bool RequiresTwoOperands() const { return true; } + virtual void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) = 0; + void EnsureSize(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { + x->resize(max(a.width(), b.width()), max(a.height(), b.height())); + } + static bool Safe(const Array2D<bool>& a, int i, int j) { + if (i >= 0 && j >= 0 && i < static_cast<int>(a.width()) && j < static_cast<int>(a.height())) + return a(i,j); + else + return false; + } + virtual void Summary() { assert(!"Summary should have been overridden"); } +}; + +// compute fmeasure, second alignment is reference, first is hyp +struct FMeasureCommand : public Command { + FMeasureCommand() : matches(), num_predicted(), num_in_ref() {} + int Result() const { return 2; } + string Name() const { return "fmeasure"; } + bool RequiresTwoOperands() const { return true; } + void Apply(const Array2D<bool>& hyp, const Array2D<bool>& ref, Array2D<bool>* x) { + (void) x; // AER just computes statistics, not an alignment + unsigned i_len = ref.width(); + unsigned j_len = ref.height(); + for (unsigned i = 0; i < i_len; ++i) { + for (unsigned j = 0; j < j_len; ++j) { + if (ref(i,j)) { + ++num_in_ref; + if (Safe(hyp, i, j)) ++matches; + } + } + } + for (unsigned i = 0; i < hyp.width(); ++i) + for (unsigned j = 0; j < hyp.height(); ++j) + if (hyp(i,j)) ++num_predicted; + } + void Summary() { + if (num_predicted == 0 || num_in_ref == 0) { + cerr << "Insufficient statistics to compute f-measure!\n"; + abort(); + } + const double prec = static_cast<double>(matches) / num_predicted; + const double rec = static_cast<double>(matches) / num_in_ref; + cout << "P: " << prec << endl; + cout << "R: " << rec << endl; + const double f = (2.0 * prec * rec) / (rec + prec); + cout << "F: " << f << endl; + } + int matches; + int num_predicted; + int num_in_ref; +}; + +struct DisplayCommand : public Command { + string Name() const { return "display"; } + bool RequiresTwoOperands() const { return false; } + void Apply(const Array2D<bool>& in, const Array2D<bool>&, Array2D<bool>* x) { + *x = in; + cout << *x << endl; + } +}; + +struct ConvertCommand : public Command { + string Name() const { return "convert"; } + bool RequiresTwoOperands() const { return false; } + void Apply(const Array2D<bool>& in, const Array2D<bool>&, Array2D<bool>* x) { + *x = in; + } +}; + +struct InvertCommand : public Command { + string Name() const { return "invert"; } + bool RequiresTwoOperands() const { return false; } + void Apply(const Array2D<bool>& in, const Array2D<bool>&, Array2D<bool>* x) { + Array2D<bool>& res = *x; + res.resize(in.height(), in.width()); + for (unsigned i = 0; i < in.height(); ++i) + for (unsigned j = 0; j < in.width(); ++j) + res(i, j) = in(j, i); + } +}; + +struct IntersectCommand : public Command { + string Name() const { return "intersect"; } + bool RequiresTwoOperands() const { return true; } + void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { + EnsureSize(a, b, x); + Array2D<bool>& res = *x; + for (unsigned i = 0; i < a.width(); ++i) + for (unsigned j = 0; j < a.height(); ++j) + res(i, j) = Safe(a, i, j) && Safe(b, i, j); + } +}; + +struct UnionCommand : public Command { + string Name() const { return "union"; } + bool RequiresTwoOperands() const { return true; } + void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { + EnsureSize(a, b, x); + Array2D<bool>& res = *x; + for (unsigned i = 0; i < res.width(); ++i) + for (unsigned j = 0; j < res.height(); ++j) + res(i, j) = Safe(a, i, j) || Safe(b, i, j); + } +}; + +struct RefineCommand : public Command { + RefineCommand() { + neighbors_.push_back(make_pair(1,0)); + neighbors_.push_back(make_pair(-1,0)); + neighbors_.push_back(make_pair(0,1)); + neighbors_.push_back(make_pair(0,-1)); + } + bool RequiresTwoOperands() const { return true; } + + void Align(unsigned i, unsigned j) { + res_(i, j) = true; + is_i_aligned_[i] = true; + is_j_aligned_[j] = true; + } + + bool IsNeighborAligned(int i, int j) const { + for (unsigned k = 0; k < neighbors_.size(); ++k) { + const int di = neighbors_[k].first; + const int dj = neighbors_[k].second; + if (Safe(res_, i + di, j + dj)) + return true; + } + return false; + } + + bool IsNeitherAligned(int i, int j) const { + return !(is_i_aligned_[i] || is_j_aligned_[j]); + } + + bool IsOneOrBothUnaligned(int i, int j) const { + return !(is_i_aligned_[i] && is_j_aligned_[j]); + } + + bool KoehnAligned(int i, int j) const { + return IsOneOrBothUnaligned(i, j) && IsNeighborAligned(i, j); + } + + typedef bool (RefineCommand::*Predicate)(int i, int j) const; + + protected: + void InitRefine( + const Array2D<bool>& a, + const Array2D<bool>& b) { + res_.clear(); + EnsureSize(a, b, &res_); + in_.clear(); un_.clear(); is_i_aligned_.clear(); is_j_aligned_.clear(); + EnsureSize(a, b, &in_); + EnsureSize(a, b, &un_); + is_i_aligned_.resize(res_.width(), false); + is_j_aligned_.resize(res_.height(), false); + for (unsigned i = 0; i < in_.width(); ++i) + for (unsigned j = 0; j < in_.height(); ++j) { + un_(i, j) = Safe(a, i, j) || Safe(b, i, j); + in_(i, j) = Safe(a, i, j) && Safe(b, i, j); + if (in_(i, j)) Align(i, j); + } + } + // "grow" the resulting alignment using the points in adds + // if they match the constraints determined by pred + void Grow(Predicate pred, bool idempotent, const Array2D<bool>& adds) { + if (idempotent) { + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) { + if (adds(i, j) && !res_(i, j) && + (this->*pred)(i, j)) Align(i, j); + } + return; + } + set<pair<int, int> > p; + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) + if (adds(i, j) && !res_(i, j)) + p.insert(make_pair(i, j)); + bool keep_going = !p.empty(); + while (keep_going) { + keep_going = false; + set<pair<int, int> > added; + for (set<pair<int, int> >::iterator pi = p.begin(); pi != p.end(); ++pi) { + if ((this->*pred)(pi->first, pi->second)) { + Align(pi->first, pi->second); + added.insert(make_pair(pi->first, pi->second)); + keep_going = true; + } + } + for (set<pair<int, int> >::iterator ai = added.begin(); ai != added.end(); ++ai) + p.erase(*ai); + } + } + Array2D<bool> res_; // refined alignment + Array2D<bool> in_; // intersection alignment + Array2D<bool> un_; // union alignment + vector<bool> is_i_aligned_; + vector<bool> is_j_aligned_; + vector<pair<int,int> > neighbors_; +}; + +struct DiagCommand : public RefineCommand { + DiagCommand() { + neighbors_.push_back(make_pair(1,1)); + neighbors_.push_back(make_pair(-1,1)); + neighbors_.push_back(make_pair(1,-1)); + neighbors_.push_back(make_pair(-1,-1)); + } +}; + +struct GDCommand : public DiagCommand { + string Name() const { return "grow-diag"; } + void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + *x = res_; + } +}; + +struct GDFCommand : public DiagCommand { + string Name() const { return "grow-diag-final"; } + void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + Grow(&RefineCommand::IsOneOrBothUnaligned, true, a); + Grow(&RefineCommand::IsOneOrBothUnaligned, true, b); + *x = res_; + } +}; + +struct GDFACommand : public DiagCommand { + string Name() const { return "grow-diag-final-and"; } + void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + Grow(&RefineCommand::IsNeitherAligned, true, a); + Grow(&RefineCommand::IsNeitherAligned, true, b); + *x = res_; + } +}; + +map<string, boost::shared_ptr<Command> > commands; + +void InitCommandLine(unsigned argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + ostringstream os; + os << "Operation to perform:"; + for (map<string, boost::shared_ptr<Command> >::iterator it = commands.begin(); + it != commands.end(); ++it) { + os << ' ' << it->first; + } + string cstr = os.str(); + opts.add_options() + ("command,c", po::value<string>()->default_value("convert"), cstr.c_str()) + ("sock_url,S", po::value<string>()->default_value("tcp://127.0.0.1:60665")) + ("help,h", "Print this help message and exit"); + po::options_description clo("Command line options"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + po::notify(*conf); + + if (conf->count("help") || conf->count("command") == 0) { + cerr << dcmdline_options << endl; + exit(1); + } + const string cmd = (*conf)["command"].as<string>(); + if (commands.count(cmd) == 0) { + cerr << "Don't understand command: " << cmd << endl; + exit(1); + } +} + +template<class C> static void AddCommand() { + C* c = new C; + commands[c->Name()].reset(c); +} + +int main(int argc, char **argv) { + AddCommand<ConvertCommand>(); + AddCommand<DisplayCommand>(); + AddCommand<InvertCommand>(); + AddCommand<IntersectCommand>(); + AddCommand<UnionCommand>(); + AddCommand<GDCommand>(); + AddCommand<GDFCommand>(); + AddCommand<GDFACommand>(); + AddCommand<FMeasureCommand>(); + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + Command& cmd = *commands[conf["command"].as<string>()]; + + nn::socket sock(AF_SP, NN_PAIR); + string url = conf["sock_url"].as<string>(); + sock.bind(url.c_str()); + int to = 100; + sock.setsockopt(NN_SOL_SOCKET, NN_RCVTIMEO, &to, sizeof (to)); + string hello = "hello"; + sock.send(hello.c_str(), hello.size()+1, 0); + + while (true) { + char* buf = NULL; + size_t sz = sock.recv(&buf, NN_MSG, 0); + if (buf) { + const string in(buf, buf+sz); + nn::freemsg(buf); + vector<string> parts; + boost::algorithm::split_regex(parts, in, boost::regex(" \\|\\|\\| ")); + boost::shared_ptr<Array2D<bool> > out(new Array2D<bool>); + boost::shared_ptr<Array2D<bool> > a1 = AlignmentIO::ReadPharaohAlignmentGrid(parts[0]); + boost::shared_ptr<Array2D<bool> > a2 = AlignmentIO::ReadPharaohAlignmentGrid(parts[1]); + cmd.Apply(*a1, *a2, out.get()); + ostringstream os; + if (cmd.Result() == 1) { + AlignmentIO::SerializePharaohFormat(*out, &os); + } else { + // error + } + sock.send(os.str().c_str(), os.str().size()+1, 0); + } + } + + return 0; +} + |