summaryrefslogtreecommitdiff
path: root/training/atools.cc
blob: bac7385956f95b1b55c2779f75e2fcb9367dd238 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#include <iostream>
#include <sstream>
#include <vector>

#include <map>
#include <boost/program_options.hpp>
#include <boost/shared_ptr.hpp>

#include "filelib.h"
#include "aligner.h"

namespace po = boost::program_options;
using namespace std;
using boost::shared_ptr;

struct Command {
  virtual ~Command() {}
  virtual string Name() const = 0;

  // returns 1 for alignment grid output [default]
  // returns 2 if Summary() should be called [for AER, etc]
  virtual int Result() const { return 1; }

  virtual bool RequiresTwoOperands() const { return true; }
  virtual void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) = 0;
  void EnsureSize(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) {
    x->resize(max(a.width(), b.width()), max(a.height(), b.width()));
  }
  bool Safe(const Array2D<bool>& a, int i, int j) const {
    if (i < a.width() && j < a.height())
      return a(i,j);
    else
      return false;
  }
  virtual void Summary() { assert(!"Summary should have been overridden"); }
};

// compute fmeasure, second alignment is reference, first is hyp
struct FMeasureCommand : public Command {
  FMeasureCommand() : matches(), num_predicted(), num_in_ref() {}
  int Result() const { return 2; }
  string Name() const { return "f"; }
  bool RequiresTwoOperands() const { return true; }
  void Apply(const Array2D<bool>& hyp, const Array2D<bool>& ref, Array2D<bool>* x) {
    int i_len = ref.width();
    int j_len = ref.height();
    for (int i = 0; i < i_len; ++i) {
      for (int j = 0; j < j_len; ++j) {
        if (ref(i,j)) {
          ++num_in_ref;
          if (Safe(hyp, i, j)) ++matches;
        } 
      }
    }
    for (int i = 0; i < hyp.width(); ++i)
      for (int j = 0; j < hyp.height(); ++j)
        if (hyp(i,j)) ++num_predicted;
  }
  void Summary() {
    if (num_predicted == 0 || num_in_ref == 0) {
      cerr << "Insufficient statistics to compute f-measure!\n";
      abort();
    }
    const double prec = static_cast<double>(matches) / num_predicted;
    const double rec = static_cast<double>(matches) / num_in_ref;
    cout << "P: " << prec << endl;
    cout << "R: " << rec << endl;
    const double f = (2.0 * prec * rec) / (rec + prec);
    cout << "F: " << f << endl;
  }
  int matches;
  int num_predicted;
  int num_in_ref;
};

struct ConvertCommand : public Command {
  string Name() const { return "convert"; }
  bool RequiresTwoOperands() const { return false; }
  void Apply(const Array2D<bool>& in, const Array2D<bool>&not_used, Array2D<bool>* x) {
    *x = in;
  }
};

struct InvertCommand : public Command {
  string Name() const { return "invert"; }
  bool RequiresTwoOperands() const { return false; }
  void Apply(const Array2D<bool>& in, const Array2D<bool>&not_used, Array2D<bool>* x) {
    Array2D<bool>& res = *x;
    res.resize(in.height(), in.width());
    for (int i = 0; i < in.height(); ++i)
      for (int j = 0; j < in.width(); ++j)
        res(i, j) = in(j, i);
  }
};

struct IntersectCommand : public Command {
  string Name() const { return "intersect"; }
  bool RequiresTwoOperands() const { return true; }
  void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) {
    EnsureSize(a, b, x);
    Array2D<bool>& res = *x;
    for (int i = 0; i < a.width(); ++i)
      for (int j = 0; j < a.height(); ++j)
        res(i, j) = Safe(a, i, j) && Safe(b, i, j);
  }
};

map<string, boost::shared_ptr<Command> > commands;

void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
  po::options_description opts("Configuration options");
  ostringstream os;
  os << "[REQ] Operation to perform:";
  for (map<string, boost::shared_ptr<Command> >::iterator it = commands.begin();
       it != commands.end(); ++it) {
    os << ' ' << it->first;
  }
  string cstr = os.str();
  opts.add_options()
        ("input_1,i", po::value<string>(), "[REQ] Alignment 1 file, - for STDIN")
        ("input_2,j", po::value<string>(), "[OPT] Alignment 2 file, - for STDIN")
	("command,c", po::value<string>()->default_value("convert"), cstr.c_str())
        ("help,h", "Print this help message and exit");
  po::options_description clo("Command line options");
  po::options_description dcmdline_options;
  dcmdline_options.add(opts);

  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
  po::notify(*conf);

  if (conf->count("help") || conf->count("input_1") == 0 || conf->count("command") == 0) {
    cerr << dcmdline_options << endl;
    exit(1);
  }
  const string cmd = (*conf)["command"].as<string>();
  if (commands.count(cmd) == 0) {
    cerr << "Don't understand command: " << cmd << endl;
    exit(1);
  }
  if (commands[cmd]->RequiresTwoOperands()) {
    if (conf->count("input_2") == 0) {
      cerr << "Command '" << cmd << "' requires two alignment files\n";
      exit(1);
    }
    if ((*conf)["input_1"].as<string>() == "-" && (*conf)["input_2"].as<string>() == "-") {
      cerr << "Both inputs cannot be STDIN\n";
      exit(1);
    }
  } else {
    if (conf->count("input_2") != 0) {
      cerr << "Command '" << cmd << "' requires only one alignment file\n";
      exit(1);
    }
  }
}

template<class C> static void AddCommand() {
  C* c = new C;
  commands[c->Name()].reset(c);
}

int main(int argc, char **argv) {
  AddCommand<ConvertCommand>();
  AddCommand<InvertCommand>();
  AddCommand<IntersectCommand>();
  AddCommand<FMeasureCommand>();
  po::variables_map conf;
  InitCommandLine(argc, argv, &conf);
  Command& cmd = *commands[conf["command"].as<string>()];
  boost::shared_ptr<ReadFile> rf1(new ReadFile(conf["input_1"].as<string>()));
  boost::shared_ptr<ReadFile> rf2;
  if (cmd.RequiresTwoOperands())
    rf2.reset(new ReadFile(conf["input_2"].as<string>()));
  istream* in1 = rf1->stream();
  istream* in2 = NULL;
  if (rf2) in2 = rf2->stream();
  while(*in1) {
    string line1;
    string line2;
    getline(*in1, line1);
    if (in2) {
      getline(*in2, line2);
      if ((*in1 && !*in2) || (*in2 && !*in1)) {
        cerr << "Mismatched number of lines!\n";
        exit(1);
      }
    }
    if (line1.empty() && !*in1) break;
    shared_ptr<Array2D<bool> > out(new Array2D<bool>);
    shared_ptr<Array2D<bool> > a1 = AlignerTools::ReadPharaohAlignmentGrid(line1);
    if (in2) {
      shared_ptr<Array2D<bool> > a2 = AlignerTools::ReadPharaohAlignmentGrid(line2);
      cmd.Apply(*a1, *a2, out.get());
    } else {
      Array2D<bool> dummy;
      cmd.Apply(*a1, dummy, out.get());
    }
    
    if (cmd.Result() == 1) {
      AlignerTools::SerializePharaohFormat(*out, &cout);
    }
  }
  if (cmd.Result() == 2)
    cmd.Summary();
  return 0;
}