summaryrefslogtreecommitdiff
path: root/src/ff_wordalign.h
blob: 1581641c66ccd843bec1705d494bdae7ebd0a926 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#ifndef _FF_WORD_ALIGN_H_
#define _FF_WORD_ALIGN_H_

#include "ff.h"
#include "array2d.h"

class RelativeSentencePosition : public FeatureFunction {
 public:
  RelativeSentencePosition(const std::string& param);
 protected:
  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                     const Hypergraph::Edge& edge,
                                     const std::vector<const void*>& ant_contexts,
                                     SparseVector<double>* features,
                                     SparseVector<double>* estimated_features,
                                     void* out_context) const;
 private:
  const int fid_;
};

class MarkovJump : public FeatureFunction {
 public:
  MarkovJump(const std::string& param);
 protected:
  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                     const Hypergraph::Edge& edge,
                                     const std::vector<const void*>& ant_contexts,
                                     SparseVector<double>* features,
                                     SparseVector<double>* estimated_features,
                                     void* out_context) const;
 private:
  const int fid_;
  bool individual_params_per_jumpsize_;
  bool condition_on_flen_;
  std::string template_;
};

class AlignerResults : public FeatureFunction {
 public:
  AlignerResults(const std::string& param);
 protected:
  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                     const Hypergraph::Edge& edge,
                                     const std::vector<const void*>& ant_contexts,
                                     SparseVector<double>* features,
                                     SparseVector<double>* estimated_features,
                                     void* out_context) const;
 private:
  int fid_;
  std::vector<boost::shared_ptr<Array2D<bool> > > is_aligned_;
  mutable int cur_sent_;
  const Array2D<bool> mutable* cur_grid_;
};

#include <tr1/unordered_map>
#include <boost/functional/hash.hpp>
#include <cassert>
class BlunsomSynchronousParseHack : public FeatureFunction {
 public:
  BlunsomSynchronousParseHack(const std::string& param);
 protected:
  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                     const Hypergraph::Edge& edge,
                                     const std::vector<const void*>& ant_contexts,
                                     SparseVector<double>* features,
                                     SparseVector<double>* estimated_features,
                                     void* out_context) const;
 private:
  inline bool DoesNotBelong(const void* state) const {
    for (int i = 0; i < NumBytesContext(); ++i) {
      if (*(static_cast<const unsigned char*>(state) + i)) return false;
    }
    return true;
  }

  inline void AppendAntecedentString(const void* state, std::vector<WordID>* yield) const {
    int i = 0;
    int ind = 0;
    while (i < NumBytesContext() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; }
    // std::cerr << i << " " << NumBytesContext() << std::endl;
    assert(i != NumBytesContext());
    assert(ind < cur_ref_->size());
    int cur = *(static_cast<const unsigned char*>(state) + i);
    int comp = 1;
    while (comp < 256 && (comp & cur) == 0) { comp <<= 1; ++ind; }
    assert(ind < cur_ref_->size());
    assert(comp < 256);
    do {
      assert(ind < cur_ref_->size());
      yield->push_back((*cur_ref_)[ind]);
      ++ind;
      comp <<= 1;
      if (comp == 256) {
        comp = 1;
        ++i;
        cur = *(static_cast<const unsigned char*>(state) + i);
      }
    } while (comp & cur);
  }

  inline void SetStateMask(int start, int end, void* state) const {
    assert((end / 8) < NumBytesContext());
    int i = 0;
    int comp = 1;
    for (int j = 0; j < start; ++j) {
      comp <<= 1;
      if (comp == 256) {
        ++i;
        comp = 1;
      }
    }
    //std::cerr << "SM: " << i << "\n";
    for (int j = start; j < end; ++j) {
      *(static_cast<unsigned char*>(state) + i) |= comp;
      //std::cerr << "  " << comp << "\n";
      comp <<= 1;
      if (comp == 256) {
        ++i;
        comp = 1;
      }
    }
    //std::cerr << "   MASK: " << ((int)*(static_cast<unsigned char*>(state))) << "\n";
  }

  const int fid_;
  mutable int cur_sent_;
  typedef std::tr1::unordered_map<std::vector<WordID>, int, boost::hash<std::vector<WordID> > > Vec2Int;
  mutable Vec2Int cur_map_;
  const std::vector<WordID> mutable * cur_ref_;
  mutable std::vector<std::vector<WordID> > refs_;
};

#endif