summaryrefslogtreecommitdiff
path: root/decoder/ff_dwarf.h
diff options
context:
space:
mode:
authorchris dyer <redpony@umd.edu>2010-12-28 20:56:42 -0500
committerchris dyer <redpony@umd.edu>2010-12-28 20:56:42 -0500
commitb85986c762bc8a2a74bfe0e2eb1d88fba991d554 (patch)
treeaae7a3b2127a14b67afda54184d261b56a670ec4 /decoder/ff_dwarf.h
parent6b4b4f19f44e051e5f62bcb1243c3d199d537cc6 (diff)
incorporate dwarf features
Diffstat (limited to 'decoder/ff_dwarf.h')
-rw-r--r--decoder/ff_dwarf.h100
1 files changed, 100 insertions, 0 deletions
diff --git a/decoder/ff_dwarf.h b/decoder/ff_dwarf.h
new file mode 100644
index 00000000..083fcc7c
--- /dev/null
+++ b/decoder/ff_dwarf.h
@@ -0,0 +1,100 @@
+#include <vector>
+#include <map>
+#include <string>
+#include "ff.h"
+#include "dwarf.h"
+#include "lattice.h"
+
+using namespace std;
+
+class Dwarf : public FeatureFunction {
+ public:
+ Dwarf(const std::string& param);
+ /* State-related param
+ STATE_SIZE: the number of ints
+ MAXIMUM_ALIGNMENTS: the maximum number of alignments in the states,
+ each alignment point is encoded in one int
+ (the first two bytes for source, and the remaining one for target)
+ */
+ static const int STATE_SIZE=53;
+ static const int IMPOSSIBLY_LARGE_POS = 9999999;
+ static const int MAXIMUM_ALIGNMENTS=37;
+ /* Read from file the Orientation(Source|Target model parameter. */
+ static bool readOrientation(CountTable* table, const std::string& filename, std::map<WordID,int> *fw, bool pos=false);
+ /* Read from file the Dominance(Source|Target) model parameter. */
+ static bool readDominance(CountTable* table, const std::string& filename, std::map<WordID,int> *fw, bool pos=false);
+ static bool readList(const std::string& filename, std::map<WordID,int>* fw);
+ static double IntegerToDouble(int val);
+ static int DoubleToInteger(double val);
+ bool readTags(const std::string& filename, std::map<WordID,WordID>* tags);
+ bool generalizeOrientation(CountTable* table, const std::map<WordID,WordID>& tags, bool pos=false);
+ bool generalizeDominance(CountTable* table, const std::map<WordID,WordID>& tags, bool pos=false);
+ static void stripIndex(const string& source, string* pkey, string* pidx) {
+ if (DEBUG) cerr << " stripIndex(" << source << ")" << endl;
+ int found = source.find_last_of("/");
+ string idx = source.substr(found+1);
+ string key = source.substr(0,found);
+ if (DEBUG) cerr << " found=" << found << "," << key << "," << idx << endl;
+ pkey = &key;
+ pidx = &idx;
+ }
+
+
+ protected:
+ /* The high-level workflow is as follow:
+ 1. call *als->prepare*, which constructs the full alignment of the edge while taking into account the antecedents
+ also in this call, function words are identified. Most of the work in this call is to make sure the indexes
+ of the alignments (including the function words) are consistent with the newly created alignment
+ 2. call *als->computeOrientationSource*, *als->computeOrientationTarget*,
+ *als->computeDominanceSource*, or *als->computeDominanceTarget*
+ and pass the resulting score to either *features* or to *estimated_features*
+ 3. call *als->BorderingSFWsOnly()* and *als->BorderingTFWsOnly()*, which removes records of all function word
+ alignments except those at the borders. Note that fw alignments kept may be more than two on each side
+ for examples if there are a number of unaligned fw alignments before the leftmost alignment or the rightmost one
+ 4. call *als->simplify()*, which assigns the state of this edge (*context*). It simplifies the alignment space to
+ its most compact representation, enough to compute the unscored models. This is done by observing the surviving
+ function word alignments set by 3.
+ */
+ void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ private:
+ Alignment* als;
+ /* Feature IDs set by calling FD::Convert(model's string) */
+ int oris_, oris_bo1_, oris_bo2_, orit_, orit_bo1_, orit_bo2_;
+ int oris_backward_, orit_backward_, porislr_, porisrl_, goris_, pgorislr_, pgorisrl_;
+ int pdomslr_, pdomsrl_, pgdomslr_, pgdomsrl_;
+ int doms_, doms_bo1_, doms_bo2_, domt_, domt_bo1_, domt_bo2_;
+ int tfw_count_;
+ int bdoms_;
+ int poris_count;
+ int pgoris_count;
+ int poris_nlr, poris_nrl; // maximum depth (1->from the beginning of the sentence, 2-> from the end of the sentence)
+ int pgoris_nlr, pgoris_nrl;
+ int pdoms_nlr, pdoms_nrl;
+ int pgdoms_nlr, pgdoms_nrl;
+ int* _sent_id;
+ int* _fwcount;
+ WordID kSOS;
+ WordID kEOS;
+ string sSOS;
+ string sEOS;
+ WordID kGOAL;
+ /* model's flag, if set true will invoke the model scoring */
+ bool flag_oris, flag_orit, flag_doms, flag_domt, flag_tfw_count, flag_oris_backward, flag_orit_backward, flag_bdoms;
+ bool flag_porislr, flag_porisrl, flag_goris, flag_pgorislr, flag_pgorisrl;
+ bool explicit_soseos;
+ bool flag_pdomslr, flag_pdomsrl, flag_pgdomslr, flag_pgdomsrl, flag_gdoms;
+ /* a collection of Source function words (sfw) and Target function words (tfw) */
+ std::map<WordID,int> sfw;
+ std::map<WordID,int> tfw;
+ std::map<WordID,WordID> tags;
+ /* a collection of model's parameter */
+ CountTable toris, torit, tdoms, tbdoms, tdomt, tporislr, tporisrl, tgoris, tpgorislr, tpgorisrl;
+ CountTable tpdomslr, tpdomsrl, tpgdomslr, tpgdomsrl;
+ void neighboringFWs(const Lattice& l, const int& i, const int& j, const map<WordID,int>& fw_hash, int* lfw, int* rfw);
+};
+