summaryrefslogtreecommitdiff
path: root/src/ff_wordalign.h
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2009-12-03 16:33:55 -0500
committerChris Dyer <redpony@gmail.com>2009-12-03 16:33:55 -0500
commit671c21451542e2dd20e45b4033d44d8e8735f87b (patch)
treeb1773b077dd65b826f067a423d26f7942ce4e043 /src/ff_wordalign.h
initial check in
Diffstat (limited to 'src/ff_wordalign.h')
-rw-r--r--src/ff_wordalign.h133
1 files changed, 133 insertions, 0 deletions
diff --git a/src/ff_wordalign.h b/src/ff_wordalign.h
new file mode 100644
index 00000000..1581641c
--- /dev/null
+++ b/src/ff_wordalign.h
@@ -0,0 +1,133 @@
+#ifndef _FF_WORD_ALIGN_H_
+#define _FF_WORD_ALIGN_H_
+
+#include "ff.h"
+#include "array2d.h"
+
+class RelativeSentencePosition : public FeatureFunction {
+ public:
+ RelativeSentencePosition(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* out_context) const;
+ private:
+ const int fid_;
+};
+
+class MarkovJump : public FeatureFunction {
+ public:
+ MarkovJump(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* out_context) const;
+ private:
+ const int fid_;
+ bool individual_params_per_jumpsize_;
+ bool condition_on_flen_;
+ std::string template_;
+};
+
+class AlignerResults : public FeatureFunction {
+ public:
+ AlignerResults(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* out_context) const;
+ private:
+ int fid_;
+ std::vector<boost::shared_ptr<Array2D<bool> > > is_aligned_;
+ mutable int cur_sent_;
+ const Array2D<bool> mutable* cur_grid_;
+};
+
+#include <tr1/unordered_map>
+#include <boost/functional/hash.hpp>
+#include <cassert>
+class BlunsomSynchronousParseHack : public FeatureFunction {
+ public:
+ BlunsomSynchronousParseHack(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* out_context) const;
+ private:
+ inline bool DoesNotBelong(const void* state) const {
+ for (int i = 0; i < NumBytesContext(); ++i) {
+ if (*(static_cast<const unsigned char*>(state) + i)) return false;
+ }
+ return true;
+ }
+
+ inline void AppendAntecedentString(const void* state, std::vector<WordID>* yield) const {
+ int i = 0;
+ int ind = 0;
+ while (i < NumBytesContext() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; }
+ // std::cerr << i << " " << NumBytesContext() << std::endl;
+ assert(i != NumBytesContext());
+ assert(ind < cur_ref_->size());
+ int cur = *(static_cast<const unsigned char*>(state) + i);
+ int comp = 1;
+ while (comp < 256 && (comp & cur) == 0) { comp <<= 1; ++ind; }
+ assert(ind < cur_ref_->size());
+ assert(comp < 256);
+ do {
+ assert(ind < cur_ref_->size());
+ yield->push_back((*cur_ref_)[ind]);
+ ++ind;
+ comp <<= 1;
+ if (comp == 256) {
+ comp = 1;
+ ++i;
+ cur = *(static_cast<const unsigned char*>(state) + i);
+ }
+ } while (comp & cur);
+ }
+
+ inline void SetStateMask(int start, int end, void* state) const {
+ assert((end / 8) < NumBytesContext());
+ int i = 0;
+ int comp = 1;
+ for (int j = 0; j < start; ++j) {
+ comp <<= 1;
+ if (comp == 256) {
+ ++i;
+ comp = 1;
+ }
+ }
+ //std::cerr << "SM: " << i << "\n";
+ for (int j = start; j < end; ++j) {
+ *(static_cast<unsigned char*>(state) + i) |= comp;
+ //std::cerr << " " << comp << "\n";
+ comp <<= 1;
+ if (comp == 256) {
+ ++i;
+ comp = 1;
+ }
+ }
+ //std::cerr << " MASK: " << ((int)*(static_cast<unsigned char*>(state))) << "\n";
+ }
+
+ const int fid_;
+ mutable int cur_sent_;
+ typedef std::tr1::unordered_map<std::vector<WordID>, int, boost::hash<std::vector<WordID> > > Vec2Int;
+ mutable Vec2Int cur_map_;
+ const std::vector<WordID> mutable * cur_ref_;
+ mutable std::vector<std::vector<WordID> > refs_;
+};
+
+#endif