summaryrefslogtreecommitdiff
path: root/decoder/ff_sample_fsa.h
blob: 74d9e7b537919ea6c63fd18a18f6e619d3661797 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#ifndef FF_SAMPLE_FSA_H
#define FF_SAMPLE_FSA_H

#include "ff_from_fsa.h"

// example: feature val = -1 * # of target words
struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> {
  static std::string usage(bool param,bool verbose) {
    return FeatureFunction::usage_helper(
      "WordPenaltyFsa","","-1 per target word"
      ,param,verbose);
  }

  WordPenaltyFsa(std::string const& param) {
    Init();
    return;
    //below are all defaults:
    set_state_bytes(0);
    start.clear();
    h_start.clear();
  }
  // move from state to next_state after seeing word x, while emitting features->add_value(fid,val) possibly with duplicates.  state and next_state may be same memory.
  Featval Scan1(WordID w,void const* state,void *next_state) const {
    return -1;
  }
};

typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa;


// appears to be buggy right now: give it a bonus weight (-) and it overstates how many
struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
  typedef FsaFeatureFunctionBase<LongerThanPrev> Base;
  static std::string usage(bool param,bool verbose) {
    return FeatureFunction::usage_helper(
      "LongerThanPrev",
      "",
      "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word (<s> sentence begin considered 3 chars long, </s> is sentence end.)",
      param,verbose);
  }

  static inline int &state(void *st) {
    return *(int*)st;
  }
  static inline int state(void const* st) {
    return *(int const*)st;
  }
  static inline int wordlen(WordID w) {
    return std::strlen(TD::Convert(w));
  }
  int markov_order() const { return 1; }
  LongerThanPrev(std::string const& param) : Base(sizeof(int),singleton_sentence(TD::se)) {
    Init();
    if (0) { // all this is done in constructor already
      set_state_bytes(sizeof(int));
      start.resize(state_bytes()); // this is done by set_state_bytes already.
      h_start.resize(state_bytes());
      int ss=3;
      to_state(start.begin(),&ss,1);
      ss=4;
      to_state(h_start.begin(),&ss,1);
    }

    state(start.begin())=3;
    state(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous

  }

  Featval Scan1(WordID w,void const* from,void *next_state) const {
    int prevlen=state(from);
    int len=wordlen(w);
    state(next_state)=len;
    return len>prevlen ? -1 : 0;
  }
};

// similar example feature; base type exposes stateful type, defines markov_order 1, state size = sizeof(State)
// also buggy right now: give it a bonus weight (-) and it overstates how many
struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
  typedef FsaTypedBase<int,ShorterThanPrev> Base;
  static std::string usage(bool param,bool verbose) {
    return FeatureFunction::usage_helper(
      "ShorterThanPrev",
      "",
      "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (end of sentence considered '</s>')",
      param,verbose);
  }

  static inline int wordlen(WordID w) {
    return std::strlen(TD::Convert(w));
  }
  ShorterThanPrev(std::string const& param)
  : Base(3,4,singleton_sentence(TD::se))
    // start, h_start, end_phrase
    // estimate: anything <4 chars is usually shorter than previous
  {
    Init();
  }


/*  Featval ScanT1(WordID w,int prevlen,int &len) const;
    // alternative to below:
    */

  // evil anti-google int & len out-param:
  void ScanT(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,int prevlen,int &len,FeatureVector *features) const {
    len=wordlen(w);
    if (len<prevlen)
      features->add_value(fid_,-1);
  }

};


#endif