diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | decoder/bottom_up_parser.cc | 19 | ||||
| -rw-r--r-- | decoder/cdec_ff.cc | 10 | ||||
| -rw-r--r-- | decoder/ff_csplit.cc | 2 | ||||
| -rw-r--r-- | decoder/ff_tagger.cc | 22 | ||||
| -rw-r--r-- | decoder/ff_tagger.h | 12 | ||||
| -rw-r--r-- | decoder/ff_wordalign.cc | 98 | ||||
| -rw-r--r-- | decoder/ff_wordalign.h | 35 | ||||
| -rw-r--r-- | decoder/tagger.cc | 2 | ||||
| -rw-r--r-- | tests/system_tests/controlled_synparse/gold.statistics | 6 | ||||
| -rw-r--r-- | tests/system_tests/controlled_synparse/input.txt | 4 | ||||
| -rw-r--r-- | tests/system_tests/tagger/cdec.ini | 6 | ||||
| -rw-r--r-- | tests/system_tests/tagger/gold.statistics | 12 | ||||
| -rw-r--r-- | tests/system_tests/tagger/gold.stdout | 2 | ||||
| -rw-r--r-- | tests/system_tests/tagger/input.txt | 2 | ||||
| -rw-r--r-- | tests/system_tests/tagger/weights | 54 | ||||
| -rw-r--r-- | tests/system_tests/unsup-align/cdec.ini | 2 | ||||
| -rw-r--r-- | tests/system_tests/unsup-align/weights | 5 | ||||
| -rwxr-xr-x | word-aligner/aligner.pl | 11 | 
19 files changed, 142 insertions, 163 deletions
| @@ -1,3 +1,4 @@ +klm/lm/build_binary  extools/extractor_monolingual  gi/posterior-regularisation/prjava/lib/*.jar  klm/lm/libklm.a diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc index 9504419c..aecf1cfa 100644 --- a/decoder/bottom_up_parser.cc +++ b/decoder/bottom_up_parser.cc @@ -14,20 +14,6 @@  using namespace std; -struct ParserStats { -  ParserStats() : active_items(), passive_items() {} -  void Reset() { active_items=0; passive_items=0; } -  void Report() { -    if (!SILENT) cerr << "  ACTIVE ITEMS: " << active_items << "\tPASSIVE ITEMS: " << passive_items << endl; -  } -  int active_items; -  int passive_items; -  void NotifyActive(int , int ) { ++active_items; } -  void NotifyPassive(int , int ) { ++passive_items; } -}; - -ParserStats stats; -  class ActiveChart;  class PassiveChart {   public: @@ -90,7 +76,6 @@ class ActiveChart {      void ExtendTerminal(int symbol, float src_cost, vector<ActiveItem>* out_cell) const {        const GrammarIter* ni = gptr_->Extend(symbol);        if (ni) { -        stats.NotifyActive(-1,-1);  // TRACKING STATS          out_cell->push_back(ActiveItem(ni, ant_nodes_, lattice_cost + src_cost));        }      } @@ -98,7 +83,6 @@ class ActiveChart {        int symbol = hg->nodes_[node_index].cat_;        const GrammarIter* ni = gptr_->Extend(symbol);        if (!ni) return; -      stats.NotifyActive(-1,-1);  // TRACKING STATS        Hypergraph::TailNodeVector na(ant_nodes_.size() + 1);        for (int i = 0; i < ant_nodes_.size(); ++i)          na[i] = ant_nodes_[i]; @@ -181,7 +165,6 @@ void PassiveChart::ApplyRule(const int i,                               const TRulePtr& r,                               const Hypergraph::TailNodeVector& ant_nodes,                               const float lattice_cost) { -  stats.NotifyPassive(i,j);  // TRACKING STATS    Hypergraph::Edge* new_edge = forest_->AddEdge(r, ant_nodes);    new_edge->prev_i_ = r->prev_i;    new_edge->prev_j_ = r->prev_j; @@ -299,9 +282,7 @@ ExhaustiveBottomUpParser::ExhaustiveBottomUpParser(  bool ExhaustiveBottomUpParser::Parse(const Lattice& input,                                       Hypergraph* forest) const { -  stats.Reset();    PassiveChart chart(goal_sym_, grammars_, input, forest);    const bool result = chart.Parse(); -  stats.Report();    return result;  } diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 729d1214..75591af8 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -53,15 +53,15 @@ void register_feature_functions() {    ff_registry.Register("LexNullJump", new FFFactory<LexNullJump>);    ff_registry.Register("NewJump", new FFFactory<NewJump>);    ff_registry.Register("SourceBigram", new FFFactory<SourceBigram>); +  ff_registry.Register("Fertility", new FFFactory<Fertility>);    ff_registry.Register("BlunsomSynchronousParseHack", new FFFactory<BlunsomSynchronousParseHack>); -  ff_registry.Register("AlignerResults", new FFFactory<AlignerResults>);    ff_registry.Register("CSplit_BasicFeatures", new FFFactory<BasicCSplitFeatures>);    ff_registry.Register("CSplit_ReverseCharLM", new FFFactory<ReverseCharLMCSplitFeature>); -  ff_registry.Register("Tagger_BigramIdentity", new FFFactory<Tagger_BigramIdentity>); -  ff_registry.Register("LexicalPairIdentity", new FFFactory<LexicalPairIdentity>); -  ff_registry.Register("OutputIdentity", new FFFactory<OutputIdentity>); +  ff_registry.Register("Tagger_BigramIndicator", new FFFactory<Tagger_BigramIndicator>); +  ff_registry.Register("LexicalPairIndicator", new FFFactory<LexicalPairIndicator>); +  ff_registry.Register("OutputIndicator", new FFFactory<OutputIndicator>);    ff_registry.Register("IdentityCycleDetector", new FFFactory<IdentityCycleDetector>); -  ff_registry.Register("InputIdentity", new FFFactory<InputIdentity>); +  ff_registry.Register("InputIndicator", new FFFactory<InputIndicator>);    ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>);    ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);    ff_registry.Register("WordSet", new FFFactory<WordSet>); diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index f267f8e8..1485009b 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -208,9 +208,9 @@ void ReverseCharLMCSplitFeature::TraversalFeaturesImpl(    if (edge.rule_->EWords() != 1) return;    const double lpp = pimpl_->LeftPhonotacticProb(smeta.GetSourceLattice(), edge.i_);    features->set_value(fid_, lpp); -#if 0    WordID neighbor_word = 0;    const WordID word = edge.rule_->e_[1]; +#if 0    if (chars > 4 && (sword[0] == 's' || sword[0] == 'n')) {      neighbor_word = TD::Convert(string(&sword[1]));    } diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc index 21d0f812..46c85cf3 100644 --- a/decoder/ff_tagger.cc +++ b/decoder/ff_tagger.cc @@ -8,10 +8,10 @@  using namespace std; -Tagger_BigramIdentity::Tagger_BigramIdentity(const std::string& param) : +Tagger_BigramIndicator::Tagger_BigramIndicator(const std::string& param) :    FeatureFunction(sizeof(WordID)) {} -void Tagger_BigramIdentity::FireFeature(const WordID& left, +void Tagger_BigramIndicator::FireFeature(const WordID& left,                                   const WordID& right,                                   SparseVector<double>* features) const {    int& fid = fmap_[left][right]; @@ -30,7 +30,7 @@ void Tagger_BigramIdentity::FireFeature(const WordID& left,    features->set_value(fid, 1.0);  } -void Tagger_BigramIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void Tagger_BigramIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features, @@ -53,18 +53,18 @@ void Tagger_BigramIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,    }  } -void LexicalPairIdentity::PrepareForInput(const SentenceMetadata& smeta) { +void LexicalPairIndicator::PrepareForInput(const SentenceMetadata& smeta) {    lexmap_->PrepareForInput(smeta);  } -LexicalPairIdentity::LexicalPairIdentity(const std::string& param) { +LexicalPairIndicator::LexicalPairIndicator(const std::string& param) {    name_ = "Id";    if (param.size()) {      // name corpus.f emap.txt      vector<string> params;      SplitOnWhitespace(param, ¶ms);      if (params.size() != 3) { -      cerr << "LexicalPairIdentity takes 3 parameters: <name> <corpus.src.txt> <trgmap.txt>\n"; +      cerr << "LexicalPairIndicator takes 3 parameters: <name> <corpus.src.txt> <trgmap.txt>\n";        cerr << " * may be used for corpus.src.txt or trgmap.txt to use surface forms\n";        cerr << " Received: " << param << endl;        abort(); @@ -76,7 +76,7 @@ LexicalPairIdentity::LexicalPairIdentity(const std::string& param) {    }  } -void LexicalPairIdentity::FireFeature(WordID src, +void LexicalPairIndicator::FireFeature(WordID src,                                        WordID trg,                                        SparseVector<double>* features) const {    int& fid = fmap_[src][trg]; @@ -88,7 +88,7 @@ void LexicalPairIdentity::FireFeature(WordID src,    features->set_value(fid, 1.0);  } -void LexicalPairIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void LexicalPairIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features, @@ -105,9 +105,9 @@ void LexicalPairIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta,    }  } -OutputIdentity::OutputIdentity(const std::string& param) {} +OutputIndicator::OutputIndicator(const std::string& param) {} -void OutputIdentity::FireFeature(WordID trg, +void OutputIndicator::FireFeature(WordID trg,                                   SparseVector<double>* features) const {    int& fid = fmap_[trg];    if (!fid) { @@ -125,7 +125,7 @@ void OutputIdentity::FireFeature(WordID trg,    features->set_value(fid, 1.0);  } -void OutputIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void OutputIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features, diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h index 6adee5ab..3066866a 100644 --- a/decoder/ff_tagger.h +++ b/decoder/ff_tagger.h @@ -13,9 +13,9 @@ typedef std::map<WordID, Class2FID> Class2Class2FID;  // the sequence unfolds from left to right, which means it doesn't  // have to split states based on left context.  // fires unigram features as well -class Tagger_BigramIdentity : public FeatureFunction { +class Tagger_BigramIndicator : public FeatureFunction {   public: -  Tagger_BigramIdentity(const std::string& param); +  Tagger_BigramIndicator(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, @@ -32,9 +32,9 @@ class Tagger_BigramIdentity : public FeatureFunction {  // for each pair of symbols cooccuring in a lexicalized rule, fire  // a feature (mostly used for tagging, but could be used for any model) -class LexicalPairIdentity : public FeatureFunction { +class LexicalPairIndicator : public FeatureFunction {   public: -  LexicalPairIdentity(const std::string& param); +  LexicalPairIndicator(const std::string& param);    virtual void PrepareForInput(const SentenceMetadata& smeta);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, @@ -53,9 +53,9 @@ class LexicalPairIdentity : public FeatureFunction {  }; -class OutputIdentity : public FeatureFunction { +class OutputIndicator : public FeatureFunction {   public: -  OutputIdentity(const std::string& param); +  OutputIndicator(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index ef3310b4..cdb8662a 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -6,6 +6,7 @@  #include <sstream>  #include <string>  #include <cmath> +#include <bitset>  #include <tr1/unordered_map>  #include <boost/tuple/tuple.hpp> @@ -443,58 +444,6 @@ void LexicalTranslationTrigger::TraversalFeaturesImpl(const SentenceMetadata& sm    }  } -// state: src word used, number of trg words generated -AlignerResults::AlignerResults(const std::string& param) : -    cur_sent_(-1), -    cur_grid_(NULL) { -  vector<string> argv; -  int argc = SplitOnWhitespace(param, &argv); -  if (argc != 2) { -    cerr << "Required format: AlignerResults [FeatureName] [file.pharaoh]\n"; -    exit(1); -  } -  cerr << "  feature: " << argv[0] << "\talignments: " << argv[1] << endl; -  fid_ = FD::Convert(argv[0]); -  ReadFile rf(argv[1]); -  istream& in = *rf.stream(); int lc = 0; -  while(in) { -    string line; -    getline(in, line); -    if (!in) break; -    ++lc; -    is_aligned_.push_back(AlignmentPharaoh::ReadPharaohAlignmentGrid(line)); -  } -  cerr << "  Loaded " << lc << " refs\n"; -} - -void AlignerResults::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                           const Hypergraph::Edge& edge, -                                           const vector<const void*>& /* ant_states */, -                                           SparseVector<double>* features, -                                           SparseVector<double>* /* estimated_features */, -                                           void* /* state */) const { -  if (edge.i_ == -1 || edge.prev_i_ == -1) -    return; - -  if (cur_sent_ != smeta.GetSentenceID()) { -    assert(smeta.HasReference()); -    cur_sent_ = smeta.GetSentenceID(); -    assert(cur_sent_ < is_aligned_.size()); -    cur_grid_ = is_aligned_[cur_sent_].get(); -  } - -  //cerr << edge.rule_->AsString() << endl; - -  int j = edge.i_;        // source side (f) -  int i = edge.prev_i_;   // target side (e) -  if (j < cur_grid_->height() && i < cur_grid_->width() && (*cur_grid_)(i, j)) { -//    if (edge.rule_->e_[0] == smeta.GetReference()[i][0].label) { -      features->set_value(fid_, 1.0); -//      cerr << edge.rule_->AsString() << "   (" << i << "," << j << ")\n"; -//    } -  } -} -  BlunsomSynchronousParseHack::BlunsomSynchronousParseHack(const string& param) :    FeatureFunction((100 / 8) + 1), fid_(FD::Convert("NotRef")), cur_sent_(-1) {    ReadFile rf(param); @@ -618,10 +567,10 @@ void IdentityCycleDetector::TraversalFeaturesImpl(const SentenceMetadata& smeta,  } -InputIdentity::InputIdentity(const std::string& param) {} +InputIndicator::InputIndicator(const std::string& param) {} -void InputIdentity::FireFeature(WordID src, -                                SparseVector<double>* features) const { +void InputIndicator::FireFeature(WordID src, +                                 SparseVector<double>* features) const {    int& fid = fmap_[src];    if (!fid) {      static map<WordID, WordID> escape; @@ -638,7 +587,7 @@ void InputIdentity::FireFeature(WordID src,    features->set_value(fid, 1.0);  } -void InputIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void InputIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features, @@ -770,3 +719,40 @@ void WordPairFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,    }  } +struct PathFertility { +  unsigned char null_fertility; +  unsigned char index_fertility[255]; +  PathFertility& operator+=(const PathFertility& rhs) { +    null_fertility += rhs.null_fertility; +    for (int i = 0; i < 255; ++i) +      index_fertility[i] += rhs.index_fertility[i]; +    return *this; +  } +}; + +Fertility::Fertility(const string& config) : +    FeatureFunction(sizeof(PathFertility)) {} + +void Fertility::TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                      const Hypergraph::Edge& edge, +                                      const std::vector<const void*>& ant_contexts, +                                      SparseVector<double>* features, +                                      SparseVector<double>* estimated_features, +                                      void* context) const { +  PathFertility& out_fert = *static_cast<PathFertility*>(context); +  if (edge.Arity() == 0) { +    if (edge.i_ < 0) { +      out_fert.null_fertility = 1; +    } else { +      out_fert.index_fertility[edge.i_] = 1; +    } +  } else if (edge.Arity() == 2) { +    const PathFertility left = *static_cast<const PathFertility*>(ant_contexts[0]); +    const PathFertility right = *static_cast<const PathFertility*>(ant_contexts[1]); +    out_fert += left; +    out_fert += right; +  } else if (edge.Arity() == 1) { +    out_fert += *static_cast<const PathFertility*>(ant_contexts[0]); +  } +} + diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index 8035000e..d7a2dda8 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -124,23 +124,6 @@ class LexicalTranslationTrigger : public FeatureFunction {    std::vector<std::vector<WordID> > triggers_;  }; -class AlignerResults : public FeatureFunction { - public: -  AlignerResults(const std::string& param); - protected: -  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* out_context) const; - private: -  int fid_; -  std::vector<boost::shared_ptr<Array2D<bool> > > is_aligned_; -  mutable int cur_sent_; -  const Array2D<bool> mutable* cur_grid_; -}; -  #include <tr1/unordered_map>  #include <boost/functional/hash.hpp>  #include <cassert> @@ -254,9 +237,9 @@ class IdentityCycleDetector : public FeatureFunction {    mutable std::map<WordID, bool> big_enough_;  }; -class InputIdentity : public FeatureFunction { +class InputIndicator : public FeatureFunction {   public: -  InputIdentity(const std::string& param); +  InputIndicator(const std::string& param);   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, @@ -270,4 +253,18 @@ class InputIdentity : public FeatureFunction {    mutable Class2FID fmap_;  }; +class Fertility : public FeatureFunction { + public: +  Fertility(const std::string& param); + protected: +  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const Hypergraph::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features, +                                     void* context) const; + private: +  mutable std::map<WordID, int> fids_; +}; +  #endif diff --git a/decoder/tagger.cc b/decoder/tagger.cc index 4dded35f..54890e85 100644 --- a/decoder/tagger.cc +++ b/decoder/tagger.cc @@ -96,7 +96,7 @@ bool Tagger::TranslateImpl(const string& input,                         SentenceMetadata* smeta,                         const vector<double>& weights,                         Hypergraph* forest) { -  Lattice lattice; +  Lattice& lattice = smeta->src_lattice_;    LatticeTools::ConvertTextToLattice(input, &lattice);    smeta->SetSourceLength(lattice.size());    vector<WordID> sequence(lattice.size()); diff --git a/tests/system_tests/controlled_synparse/gold.statistics b/tests/system_tests/controlled_synparse/gold.statistics index 0b8453f8..4cd93617 100644 --- a/tests/system_tests/controlled_synparse/gold.statistics +++ b/tests/system_tests/controlled_synparse/gold.statistics @@ -1,12 +1,6 @@  -lm_nodes 11  -lm_edges 18  -lm_paths 18 -constr_nodes 8 -constr_edges 8 -constr_paths 1  -lm_nodes 11  -lm_edges 18  -lm_paths 18 -constr_nodes 12 -constr_edges 14 -constr_paths 3 diff --git a/tests/system_tests/controlled_synparse/input.txt b/tests/system_tests/controlled_synparse/input.txt index 2dbc09c8..4b2b2589 100644 --- a/tests/system_tests/controlled_synparse/input.txt +++ b/tests/system_tests/controlled_synparse/input.txt @@ -1,2 +1,2 @@ -A B C D ||| a d c b -A B C D ||| a b c d +A B C D +A B C D diff --git a/tests/system_tests/tagger/cdec.ini b/tests/system_tests/tagger/cdec.ini index 15cc930d..e11e6878 100644 --- a/tests/system_tests/tagger/cdec.ini +++ b/tests/system_tests/tagger/cdec.ini @@ -1,5 +1,5 @@  formalism=tagger -feature_function=Tagger_BigramIdentity -feature_function=LexicalPairIdentity +feature_function=Tagger_BigramIndicator +feature_function=LexicalPairIndicator  intersection_strategy=full -tagger_tagset=tagset
\ No newline at end of file +tagger_tagset=tagset diff --git a/tests/system_tests/tagger/gold.statistics b/tests/system_tests/tagger/gold.statistics new file mode 100644 index 00000000..a86584aa --- /dev/null +++ b/tests/system_tests/tagger/gold.statistics @@ -0,0 +1,12 @@ +-lm_nodes 6 +-lm_edges 12 +-lm_paths 27 ++lm_nodes 16 ++lm_edges 30 ++lm_paths 27 +-lm_nodes 2 +-lm_edges 4 +-lm_paths 3 ++lm_nodes 4 ++lm_edges 6 ++lm_paths 3 diff --git a/tests/system_tests/tagger/gold.stdout b/tests/system_tests/tagger/gold.stdout new file mode 100644 index 00000000..0dc9a4ec --- /dev/null +++ b/tests/system_tests/tagger/gold.stdout @@ -0,0 +1,2 @@ +c c c +c diff --git a/tests/system_tests/tagger/input.txt b/tests/system_tests/tagger/input.txt index fe5cb58d..b6cfc776 100644 --- a/tests/system_tests/tagger/input.txt +++ b/tests/system_tests/tagger/input.txt @@ -1,2 +1,2 @@  A B C -A
\ No newline at end of file +A diff --git a/tests/system_tests/tagger/weights b/tests/system_tests/tagger/weights index 5a035c26..df9adc7b 100644 --- a/tests/system_tests/tagger/weights +++ b/tests/system_tests/tagger/weights @@ -1,27 +1,27 @@ -Uni:a 0 -Id:A:a 0 -Uni:b 0 -Id:A:b 0 -Uni:c 0 -Id:A:c 0 -Id:B:a 0 -Id:B:b 0 -Id:B:c 0 -Bi:BOS_a 0 -Bi:a_a 0 -Bi:BOS_b 0 -Bi:b_a 0 -Bi:BOS_c 0 -Bi:c_a 0 -Bi:a_b 0 -Bi:b_b 0 -Bi:c_b 0 -Bi:a_c 0 -Bi:b_c 0 -Bi:c_c 0 -Id:C:a 0 -Id:C:b 0 -Id:C:c 0 -Bi:a_EOS 0 -Bi:b_EOS 0 -Bi:c_EOS 0 +Uni:a 0.1 +Id:A:a 0.2 +Uni:b 0.321 +Id:A:b -0.2 +Uni:c 0.54 +Id:A:c 0.7 +Id:B:a 0.13 +Id:B:b 0.14 +Id:B:c 0.15 +Bi:BOS_a 0.16 +Bi:a_a 0.27 +Bi:BOS_b 0.18 +Bi:b_a 0.19 +Bi:BOS_c 0.22 +Bi:c_a 0.23 +Bi:a_b 0.24 +Bi:b_b 0.25 +Bi:c_b 0.26 +Bi:a_c 0.27 +Bi:b_c 0.28 +Bi:c_c 0.29 +Id:C:a 0.41 +Id:C:b 0.42 +Id:C:c 0.43 +Bi:a_EOS 0.44 +Bi:b_EOS 0.45 +Bi:c_EOS 0.98 diff --git a/tests/system_tests/unsup-align/cdec.ini b/tests/system_tests/unsup-align/cdec.ini index 885338a6..f591bcbe 100644 --- a/tests/system_tests/unsup-align/cdec.ini +++ b/tests/system_tests/unsup-align/cdec.ini @@ -3,4 +3,4 @@ grammar=unsup-align.lex-grammar  intersection_strategy=full  formalism=lextrans  feature_function=RelativeSentencePosition -feature_function=MarkovJump -b +feature_function=NewJump diff --git a/tests/system_tests/unsup-align/weights b/tests/system_tests/unsup-align/weights index 7d9012c5..535ee67b 100644 --- a/tests/system_tests/unsup-align/weights +++ b/tests/system_tests/unsup-align/weights @@ -1,4 +1,9 @@  RelativeSentencePosition -0.1 +J:R1 0.2 +J:R2 0.1 +J:S0 0 +J:L1 -0.1 +J:L2 -0.2  MarkovJump -0.2  F1000001 0.45280036748928199  F1000002 -0.30603801277140658 diff --git a/word-aligner/aligner.pl b/word-aligner/aligner.pl index fc2e7fcb..3a385a88 100755 --- a/word-aligner/aligner.pl +++ b/word-aligner/aligner.pl @@ -119,16 +119,17 @@ grammar=$align_dir/grammars/corpus.$direction.lex-grammar.gz  # per_sentence_grammar_file=$align_dir/grammars/psg.$direction  feature_function=WordPairFeatures $align_dir/grammars/wordpairs.$direction.features.gz -feature_function=LexicalPairIdentity +feature_function=LexicalPairIndicator  # stem translation -feature_function=LexicalPairIdentity S $align_dir/grammars/corpus.stemmed.$first $align_dir/grammars/${second}stem.map +feature_function=LexicalPairIndicator S $align_dir/grammars/corpus.stemmed.$first $align_dir/grammars/${second}stem.map  # POS translation -feature_function=LexicalPairIdentity C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second -feature_function=InputIdentity -feature_function=OutputIdentity +feature_function=LexicalPairIndicator C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second +feature_function=InputIndicator +feature_function=OutputIndicator  feature_function=RelativeSentencePosition $align_dir/grammars/corpus.class.$first  feature_function=LexNullJump  feature_function=NewJump +feature_function=IdentityCycleDetector  feature_function=NewJump use_binned_log_lengths flen  # jump distance and src and destination class type  feature_function=NewJump use_binned_log_lengths f0 fprev f:$align_dir/grammars/corpus.class.$first | 
