diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | decoder/bottom_up_parser.cc | 19 | ||||
-rw-r--r-- | decoder/cdec_ff.cc | 10 | ||||
-rw-r--r-- | decoder/ff_csplit.cc | 2 | ||||
-rw-r--r-- | decoder/ff_tagger.cc | 22 | ||||
-rw-r--r-- | decoder/ff_tagger.h | 12 | ||||
-rw-r--r-- | decoder/ff_wordalign.cc | 98 | ||||
-rw-r--r-- | decoder/ff_wordalign.h | 35 | ||||
-rw-r--r-- | decoder/tagger.cc | 2 | ||||
-rw-r--r-- | tests/system_tests/controlled_synparse/gold.statistics | 6 | ||||
-rw-r--r-- | tests/system_tests/controlled_synparse/input.txt | 4 | ||||
-rw-r--r-- | tests/system_tests/tagger/cdec.ini | 6 | ||||
-rw-r--r-- | tests/system_tests/tagger/gold.statistics | 12 | ||||
-rw-r--r-- | tests/system_tests/tagger/gold.stdout | 2 | ||||
-rw-r--r-- | tests/system_tests/tagger/input.txt | 2 | ||||
-rw-r--r-- | tests/system_tests/tagger/weights | 54 | ||||
-rw-r--r-- | tests/system_tests/unsup-align/cdec.ini | 2 | ||||
-rw-r--r-- | tests/system_tests/unsup-align/weights | 5 | ||||
-rwxr-xr-x | word-aligner/aligner.pl | 11 |
19 files changed, 142 insertions, 163 deletions
@@ -1,3 +1,4 @@ +klm/lm/build_binary extools/extractor_monolingual gi/posterior-regularisation/prjava/lib/*.jar klm/lm/libklm.a diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc index 9504419c..aecf1cfa 100644 --- a/decoder/bottom_up_parser.cc +++ b/decoder/bottom_up_parser.cc @@ -14,20 +14,6 @@ using namespace std; -struct ParserStats { - ParserStats() : active_items(), passive_items() {} - void Reset() { active_items=0; passive_items=0; } - void Report() { - if (!SILENT) cerr << " ACTIVE ITEMS: " << active_items << "\tPASSIVE ITEMS: " << passive_items << endl; - } - int active_items; - int passive_items; - void NotifyActive(int , int ) { ++active_items; } - void NotifyPassive(int , int ) { ++passive_items; } -}; - -ParserStats stats; - class ActiveChart; class PassiveChart { public: @@ -90,7 +76,6 @@ class ActiveChart { void ExtendTerminal(int symbol, float src_cost, vector<ActiveItem>* out_cell) const { const GrammarIter* ni = gptr_->Extend(symbol); if (ni) { - stats.NotifyActive(-1,-1); // TRACKING STATS out_cell->push_back(ActiveItem(ni, ant_nodes_, lattice_cost + src_cost)); } } @@ -98,7 +83,6 @@ class ActiveChart { int symbol = hg->nodes_[node_index].cat_; const GrammarIter* ni = gptr_->Extend(symbol); if (!ni) return; - stats.NotifyActive(-1,-1); // TRACKING STATS Hypergraph::TailNodeVector na(ant_nodes_.size() + 1); for (int i = 0; i < ant_nodes_.size(); ++i) na[i] = ant_nodes_[i]; @@ -181,7 +165,6 @@ void PassiveChart::ApplyRule(const int i, const TRulePtr& r, const Hypergraph::TailNodeVector& ant_nodes, const float lattice_cost) { - stats.NotifyPassive(i,j); // TRACKING STATS Hypergraph::Edge* new_edge = forest_->AddEdge(r, ant_nodes); new_edge->prev_i_ = r->prev_i; new_edge->prev_j_ = r->prev_j; @@ -299,9 +282,7 @@ ExhaustiveBottomUpParser::ExhaustiveBottomUpParser( bool ExhaustiveBottomUpParser::Parse(const Lattice& input, Hypergraph* forest) const { - stats.Reset(); PassiveChart chart(goal_sym_, grammars_, input, forest); const bool result = chart.Parse(); - stats.Report(); return result; } diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 729d1214..75591af8 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -53,15 +53,15 @@ void register_feature_functions() { ff_registry.Register("LexNullJump", new FFFactory<LexNullJump>); ff_registry.Register("NewJump", new FFFactory<NewJump>); ff_registry.Register("SourceBigram", new FFFactory<SourceBigram>); + ff_registry.Register("Fertility", new FFFactory<Fertility>); ff_registry.Register("BlunsomSynchronousParseHack", new FFFactory<BlunsomSynchronousParseHack>); - ff_registry.Register("AlignerResults", new FFFactory<AlignerResults>); ff_registry.Register("CSplit_BasicFeatures", new FFFactory<BasicCSplitFeatures>); ff_registry.Register("CSplit_ReverseCharLM", new FFFactory<ReverseCharLMCSplitFeature>); - ff_registry.Register("Tagger_BigramIdentity", new FFFactory<Tagger_BigramIdentity>); - ff_registry.Register("LexicalPairIdentity", new FFFactory<LexicalPairIdentity>); - ff_registry.Register("OutputIdentity", new FFFactory<OutputIdentity>); + ff_registry.Register("Tagger_BigramIndicator", new FFFactory<Tagger_BigramIndicator>); + ff_registry.Register("LexicalPairIndicator", new FFFactory<LexicalPairIndicator>); + ff_registry.Register("OutputIndicator", new FFFactory<OutputIndicator>); ff_registry.Register("IdentityCycleDetector", new FFFactory<IdentityCycleDetector>); - ff_registry.Register("InputIdentity", new FFFactory<InputIdentity>); + ff_registry.Register("InputIndicator", new FFFactory<InputIndicator>); ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>); ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>); ff_registry.Register("WordSet", new FFFactory<WordSet>); diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index f267f8e8..1485009b 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -208,9 +208,9 @@ void ReverseCharLMCSplitFeature::TraversalFeaturesImpl( if (edge.rule_->EWords() != 1) return; const double lpp = pimpl_->LeftPhonotacticProb(smeta.GetSourceLattice(), edge.i_); features->set_value(fid_, lpp); -#if 0 WordID neighbor_word = 0; const WordID word = edge.rule_->e_[1]; +#if 0 if (chars > 4 && (sword[0] == 's' || sword[0] == 'n')) { neighbor_word = TD::Convert(string(&sword[1])); } diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc index 21d0f812..46c85cf3 100644 --- a/decoder/ff_tagger.cc +++ b/decoder/ff_tagger.cc @@ -8,10 +8,10 @@ using namespace std; -Tagger_BigramIdentity::Tagger_BigramIdentity(const std::string& param) : +Tagger_BigramIndicator::Tagger_BigramIndicator(const std::string& param) : FeatureFunction(sizeof(WordID)) {} -void Tagger_BigramIdentity::FireFeature(const WordID& left, +void Tagger_BigramIndicator::FireFeature(const WordID& left, const WordID& right, SparseVector<double>* features) const { int& fid = fmap_[left][right]; @@ -30,7 +30,7 @@ void Tagger_BigramIdentity::FireFeature(const WordID& left, features->set_value(fid, 1.0); } -void Tagger_BigramIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void Tagger_BigramIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, @@ -53,18 +53,18 @@ void Tagger_BigramIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, } } -void LexicalPairIdentity::PrepareForInput(const SentenceMetadata& smeta) { +void LexicalPairIndicator::PrepareForInput(const SentenceMetadata& smeta) { lexmap_->PrepareForInput(smeta); } -LexicalPairIdentity::LexicalPairIdentity(const std::string& param) { +LexicalPairIndicator::LexicalPairIndicator(const std::string& param) { name_ = "Id"; if (param.size()) { // name corpus.f emap.txt vector<string> params; SplitOnWhitespace(param, ¶ms); if (params.size() != 3) { - cerr << "LexicalPairIdentity takes 3 parameters: <name> <corpus.src.txt> <trgmap.txt>\n"; + cerr << "LexicalPairIndicator takes 3 parameters: <name> <corpus.src.txt> <trgmap.txt>\n"; cerr << " * may be used for corpus.src.txt or trgmap.txt to use surface forms\n"; cerr << " Received: " << param << endl; abort(); @@ -76,7 +76,7 @@ LexicalPairIdentity::LexicalPairIdentity(const std::string& param) { } } -void LexicalPairIdentity::FireFeature(WordID src, +void LexicalPairIndicator::FireFeature(WordID src, WordID trg, SparseVector<double>* features) const { int& fid = fmap_[src][trg]; @@ -88,7 +88,7 @@ void LexicalPairIdentity::FireFeature(WordID src, features->set_value(fid, 1.0); } -void LexicalPairIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void LexicalPairIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, @@ -105,9 +105,9 @@ void LexicalPairIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, } } -OutputIdentity::OutputIdentity(const std::string& param) {} +OutputIndicator::OutputIndicator(const std::string& param) {} -void OutputIdentity::FireFeature(WordID trg, +void OutputIndicator::FireFeature(WordID trg, SparseVector<double>* features) const { int& fid = fmap_[trg]; if (!fid) { @@ -125,7 +125,7 @@ void OutputIdentity::FireFeature(WordID trg, features->set_value(fid, 1.0); } -void OutputIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void OutputIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h index 6adee5ab..3066866a 100644 --- a/decoder/ff_tagger.h +++ b/decoder/ff_tagger.h @@ -13,9 +13,9 @@ typedef std::map<WordID, Class2FID> Class2Class2FID; // the sequence unfolds from left to right, which means it doesn't // have to split states based on left context. // fires unigram features as well -class Tagger_BigramIdentity : public FeatureFunction { +class Tagger_BigramIndicator : public FeatureFunction { public: - Tagger_BigramIdentity(const std::string& param); + Tagger_BigramIndicator(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, @@ -32,9 +32,9 @@ class Tagger_BigramIdentity : public FeatureFunction { // for each pair of symbols cooccuring in a lexicalized rule, fire // a feature (mostly used for tagging, but could be used for any model) -class LexicalPairIdentity : public FeatureFunction { +class LexicalPairIndicator : public FeatureFunction { public: - LexicalPairIdentity(const std::string& param); + LexicalPairIndicator(const std::string& param); virtual void PrepareForInput(const SentenceMetadata& smeta); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, @@ -53,9 +53,9 @@ class LexicalPairIdentity : public FeatureFunction { }; -class OutputIdentity : public FeatureFunction { +class OutputIndicator : public FeatureFunction { public: - OutputIdentity(const std::string& param); + OutputIndicator(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index ef3310b4..cdb8662a 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -6,6 +6,7 @@ #include <sstream> #include <string> #include <cmath> +#include <bitset> #include <tr1/unordered_map> #include <boost/tuple/tuple.hpp> @@ -443,58 +444,6 @@ void LexicalTranslationTrigger::TraversalFeaturesImpl(const SentenceMetadata& sm } } -// state: src word used, number of trg words generated -AlignerResults::AlignerResults(const std::string& param) : - cur_sent_(-1), - cur_grid_(NULL) { - vector<string> argv; - int argc = SplitOnWhitespace(param, &argv); - if (argc != 2) { - cerr << "Required format: AlignerResults [FeatureName] [file.pharaoh]\n"; - exit(1); - } - cerr << " feature: " << argv[0] << "\talignments: " << argv[1] << endl; - fid_ = FD::Convert(argv[0]); - ReadFile rf(argv[1]); - istream& in = *rf.stream(); int lc = 0; - while(in) { - string line; - getline(in, line); - if (!in) break; - ++lc; - is_aligned_.push_back(AlignmentPharaoh::ReadPharaohAlignmentGrid(line)); - } - cerr << " Loaded " << lc << " refs\n"; -} - -void AlignerResults::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const vector<const void*>& /* ant_states */, - SparseVector<double>* features, - SparseVector<double>* /* estimated_features */, - void* /* state */) const { - if (edge.i_ == -1 || edge.prev_i_ == -1) - return; - - if (cur_sent_ != smeta.GetSentenceID()) { - assert(smeta.HasReference()); - cur_sent_ = smeta.GetSentenceID(); - assert(cur_sent_ < is_aligned_.size()); - cur_grid_ = is_aligned_[cur_sent_].get(); - } - - //cerr << edge.rule_->AsString() << endl; - - int j = edge.i_; // source side (f) - int i = edge.prev_i_; // target side (e) - if (j < cur_grid_->height() && i < cur_grid_->width() && (*cur_grid_)(i, j)) { -// if (edge.rule_->e_[0] == smeta.GetReference()[i][0].label) { - features->set_value(fid_, 1.0); -// cerr << edge.rule_->AsString() << " (" << i << "," << j << ")\n"; -// } - } -} - BlunsomSynchronousParseHack::BlunsomSynchronousParseHack(const string& param) : FeatureFunction((100 / 8) + 1), fid_(FD::Convert("NotRef")), cur_sent_(-1) { ReadFile rf(param); @@ -618,10 +567,10 @@ void IdentityCycleDetector::TraversalFeaturesImpl(const SentenceMetadata& smeta, } -InputIdentity::InputIdentity(const std::string& param) {} +InputIndicator::InputIndicator(const std::string& param) {} -void InputIdentity::FireFeature(WordID src, - SparseVector<double>* features) const { +void InputIndicator::FireFeature(WordID src, + SparseVector<double>* features) const { int& fid = fmap_[src]; if (!fid) { static map<WordID, WordID> escape; @@ -638,7 +587,7 @@ void InputIdentity::FireFeature(WordID src, features->set_value(fid, 1.0); } -void InputIdentity::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void InputIndicator::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, @@ -770,3 +719,40 @@ void WordPairFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, } } +struct PathFertility { + unsigned char null_fertility; + unsigned char index_fertility[255]; + PathFertility& operator+=(const PathFertility& rhs) { + null_fertility += rhs.null_fertility; + for (int i = 0; i < 255; ++i) + index_fertility[i] += rhs.index_fertility[i]; + return *this; + } +}; + +Fertility::Fertility(const string& config) : + FeatureFunction(sizeof(PathFertility)) {} + +void Fertility::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const { + PathFertility& out_fert = *static_cast<PathFertility*>(context); + if (edge.Arity() == 0) { + if (edge.i_ < 0) { + out_fert.null_fertility = 1; + } else { + out_fert.index_fertility[edge.i_] = 1; + } + } else if (edge.Arity() == 2) { + const PathFertility left = *static_cast<const PathFertility*>(ant_contexts[0]); + const PathFertility right = *static_cast<const PathFertility*>(ant_contexts[1]); + out_fert += left; + out_fert += right; + } else if (edge.Arity() == 1) { + out_fert += *static_cast<const PathFertility*>(ant_contexts[0]); + } +} + diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index 8035000e..d7a2dda8 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -124,23 +124,6 @@ class LexicalTranslationTrigger : public FeatureFunction { std::vector<std::vector<WordID> > triggers_; }; -class AlignerResults : public FeatureFunction { - public: - AlignerResults(const std::string& param); - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector<const void*>& ant_contexts, - SparseVector<double>* features, - SparseVector<double>* estimated_features, - void* out_context) const; - private: - int fid_; - std::vector<boost::shared_ptr<Array2D<bool> > > is_aligned_; - mutable int cur_sent_; - const Array2D<bool> mutable* cur_grid_; -}; - #include <tr1/unordered_map> #include <boost/functional/hash.hpp> #include <cassert> @@ -254,9 +237,9 @@ class IdentityCycleDetector : public FeatureFunction { mutable std::map<WordID, bool> big_enough_; }; -class InputIdentity : public FeatureFunction { +class InputIndicator : public FeatureFunction { public: - InputIdentity(const std::string& param); + InputIndicator(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, @@ -270,4 +253,18 @@ class InputIdentity : public FeatureFunction { mutable Class2FID fmap_; }; +class Fertility : public FeatureFunction { + public: + Fertility(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + private: + mutable std::map<WordID, int> fids_; +}; + #endif diff --git a/decoder/tagger.cc b/decoder/tagger.cc index 4dded35f..54890e85 100644 --- a/decoder/tagger.cc +++ b/decoder/tagger.cc @@ -96,7 +96,7 @@ bool Tagger::TranslateImpl(const string& input, SentenceMetadata* smeta, const vector<double>& weights, Hypergraph* forest) { - Lattice lattice; + Lattice& lattice = smeta->src_lattice_; LatticeTools::ConvertTextToLattice(input, &lattice); smeta->SetSourceLength(lattice.size()); vector<WordID> sequence(lattice.size()); diff --git a/tests/system_tests/controlled_synparse/gold.statistics b/tests/system_tests/controlled_synparse/gold.statistics index 0b8453f8..4cd93617 100644 --- a/tests/system_tests/controlled_synparse/gold.statistics +++ b/tests/system_tests/controlled_synparse/gold.statistics @@ -1,12 +1,6 @@ -lm_nodes 11 -lm_edges 18 -lm_paths 18 -constr_nodes 8 -constr_edges 8 -constr_paths 1 -lm_nodes 11 -lm_edges 18 -lm_paths 18 -constr_nodes 12 -constr_edges 14 -constr_paths 3 diff --git a/tests/system_tests/controlled_synparse/input.txt b/tests/system_tests/controlled_synparse/input.txt index 2dbc09c8..4b2b2589 100644 --- a/tests/system_tests/controlled_synparse/input.txt +++ b/tests/system_tests/controlled_synparse/input.txt @@ -1,2 +1,2 @@ -A B C D ||| a d c b -A B C D ||| a b c d +A B C D +A B C D diff --git a/tests/system_tests/tagger/cdec.ini b/tests/system_tests/tagger/cdec.ini index 15cc930d..e11e6878 100644 --- a/tests/system_tests/tagger/cdec.ini +++ b/tests/system_tests/tagger/cdec.ini @@ -1,5 +1,5 @@ formalism=tagger -feature_function=Tagger_BigramIdentity -feature_function=LexicalPairIdentity +feature_function=Tagger_BigramIndicator +feature_function=LexicalPairIndicator intersection_strategy=full -tagger_tagset=tagset
\ No newline at end of file +tagger_tagset=tagset diff --git a/tests/system_tests/tagger/gold.statistics b/tests/system_tests/tagger/gold.statistics new file mode 100644 index 00000000..a86584aa --- /dev/null +++ b/tests/system_tests/tagger/gold.statistics @@ -0,0 +1,12 @@ +-lm_nodes 6 +-lm_edges 12 +-lm_paths 27 ++lm_nodes 16 ++lm_edges 30 ++lm_paths 27 +-lm_nodes 2 +-lm_edges 4 +-lm_paths 3 ++lm_nodes 4 ++lm_edges 6 ++lm_paths 3 diff --git a/tests/system_tests/tagger/gold.stdout b/tests/system_tests/tagger/gold.stdout new file mode 100644 index 00000000..0dc9a4ec --- /dev/null +++ b/tests/system_tests/tagger/gold.stdout @@ -0,0 +1,2 @@ +c c c +c diff --git a/tests/system_tests/tagger/input.txt b/tests/system_tests/tagger/input.txt index fe5cb58d..b6cfc776 100644 --- a/tests/system_tests/tagger/input.txt +++ b/tests/system_tests/tagger/input.txt @@ -1,2 +1,2 @@ A B C -A
\ No newline at end of file +A diff --git a/tests/system_tests/tagger/weights b/tests/system_tests/tagger/weights index 5a035c26..df9adc7b 100644 --- a/tests/system_tests/tagger/weights +++ b/tests/system_tests/tagger/weights @@ -1,27 +1,27 @@ -Uni:a 0 -Id:A:a 0 -Uni:b 0 -Id:A:b 0 -Uni:c 0 -Id:A:c 0 -Id:B:a 0 -Id:B:b 0 -Id:B:c 0 -Bi:BOS_a 0 -Bi:a_a 0 -Bi:BOS_b 0 -Bi:b_a 0 -Bi:BOS_c 0 -Bi:c_a 0 -Bi:a_b 0 -Bi:b_b 0 -Bi:c_b 0 -Bi:a_c 0 -Bi:b_c 0 -Bi:c_c 0 -Id:C:a 0 -Id:C:b 0 -Id:C:c 0 -Bi:a_EOS 0 -Bi:b_EOS 0 -Bi:c_EOS 0 +Uni:a 0.1 +Id:A:a 0.2 +Uni:b 0.321 +Id:A:b -0.2 +Uni:c 0.54 +Id:A:c 0.7 +Id:B:a 0.13 +Id:B:b 0.14 +Id:B:c 0.15 +Bi:BOS_a 0.16 +Bi:a_a 0.27 +Bi:BOS_b 0.18 +Bi:b_a 0.19 +Bi:BOS_c 0.22 +Bi:c_a 0.23 +Bi:a_b 0.24 +Bi:b_b 0.25 +Bi:c_b 0.26 +Bi:a_c 0.27 +Bi:b_c 0.28 +Bi:c_c 0.29 +Id:C:a 0.41 +Id:C:b 0.42 +Id:C:c 0.43 +Bi:a_EOS 0.44 +Bi:b_EOS 0.45 +Bi:c_EOS 0.98 diff --git a/tests/system_tests/unsup-align/cdec.ini b/tests/system_tests/unsup-align/cdec.ini index 885338a6..f591bcbe 100644 --- a/tests/system_tests/unsup-align/cdec.ini +++ b/tests/system_tests/unsup-align/cdec.ini @@ -3,4 +3,4 @@ grammar=unsup-align.lex-grammar intersection_strategy=full formalism=lextrans feature_function=RelativeSentencePosition -feature_function=MarkovJump -b +feature_function=NewJump diff --git a/tests/system_tests/unsup-align/weights b/tests/system_tests/unsup-align/weights index 7d9012c5..535ee67b 100644 --- a/tests/system_tests/unsup-align/weights +++ b/tests/system_tests/unsup-align/weights @@ -1,4 +1,9 @@ RelativeSentencePosition -0.1 +J:R1 0.2 +J:R2 0.1 +J:S0 0 +J:L1 -0.1 +J:L2 -0.2 MarkovJump -0.2 F1000001 0.45280036748928199 F1000002 -0.30603801277140658 diff --git a/word-aligner/aligner.pl b/word-aligner/aligner.pl index fc2e7fcb..3a385a88 100755 --- a/word-aligner/aligner.pl +++ b/word-aligner/aligner.pl @@ -119,16 +119,17 @@ grammar=$align_dir/grammars/corpus.$direction.lex-grammar.gz # per_sentence_grammar_file=$align_dir/grammars/psg.$direction feature_function=WordPairFeatures $align_dir/grammars/wordpairs.$direction.features.gz -feature_function=LexicalPairIdentity +feature_function=LexicalPairIndicator # stem translation -feature_function=LexicalPairIdentity S $align_dir/grammars/corpus.stemmed.$first $align_dir/grammars/${second}stem.map +feature_function=LexicalPairIndicator S $align_dir/grammars/corpus.stemmed.$first $align_dir/grammars/${second}stem.map # POS translation -feature_function=LexicalPairIdentity C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second -feature_function=InputIdentity -feature_function=OutputIdentity +feature_function=LexicalPairIndicator C $align_dir/grammars/corpus.class.$first $align_dir/grammars/voc2class.$second +feature_function=InputIndicator +feature_function=OutputIndicator feature_function=RelativeSentencePosition $align_dir/grammars/corpus.class.$first feature_function=LexNullJump feature_function=NewJump +feature_function=IdentityCycleDetector feature_function=NewJump use_binned_log_lengths flen # jump distance and src and destination class type feature_function=NewJump use_binned_log_lengths f0 fprev f:$align_dir/grammars/corpus.class.$first |