diff options
-rw-r--r-- | LICENSE.cctbx.txt | 45 | ||||
-rw-r--r-- | LICENSE.txt | 213 | ||||
-rw-r--r-- | README | 19 | ||||
-rw-r--r-- | src/array2d.h | 1 | ||||
-rw-r--r-- | src/bottom_up_parser.cc | 53 | ||||
-rw-r--r-- | src/cdec_ff.cc | 1 | ||||
-rw-r--r-- | src/ff.cc | 21 | ||||
-rw-r--r-- | src/ff.h | 15 | ||||
-rw-r--r-- | src/grammar.cc | 11 | ||||
-rw-r--r-- | src/grammar.h | 8 | ||||
-rw-r--r-- | src/lattice.cc | 34 | ||||
-rw-r--r-- | src/lattice.h | 24 | ||||
-rw-r--r-- | src/synparse.cc | 212 | ||||
-rw-r--r-- | tests/system_tests/controlled_synparse/gold.stdout | 72 | ||||
-rw-r--r-- | tests/system_tests/ftrans/gold.stdout | 8 | ||||
-rw-r--r-- | tests/system_tests/lattice/cdec.ini | 5 | ||||
-rw-r--r-- | tests/system_tests/lattice/gold.statistics | 9 | ||||
-rw-r--r-- | tests/system_tests/lattice/gold.stdout | 5 | ||||
-rw-r--r-- | tests/system_tests/lattice/input.txt | 1 | ||||
-rw-r--r-- | tests/system_tests/lattice/lattice.scfg | 6 | ||||
-rw-r--r-- | tests/system_tests/lattice/weights | 4 |
21 files changed, 466 insertions, 301 deletions
diff --git a/LICENSE.cctbx.txt b/LICENSE.cctbx.txt new file mode 100644 index 00000000..a8d9a494 --- /dev/null +++ b/LICENSE.cctbx.txt @@ -0,0 +1,45 @@ +*** License agreement *** + +cctbx Copyright (c) 2006, The Regents of the University of +California, through Lawrence Berkeley National Laboratory (subject to +receipt of any required approvals from the U.S. Dept. of Energy). All +rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +(1) Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +(2) Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +(3) Neither the name of the University of California, Lawrence Berkeley +National Laboratory, U.S. Dept. of Energy nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You are under no obligation whatsoever to provide any bug fixes, +patches, or upgrades to the features, functionality or performance of +the source code ("Enhancements") to anyone; however, if you choose to +make your Enhancements available either publicly, or directly to +Lawrence Berkeley National Laboratory, without imposing a separate +written license agreement for such Enhancements, then you hereby grant +the following license: a non-exclusive, royalty-free perpetual license +to install, use, modify, prepare derivative works, incorporate into +other computer software, distribute, and sublicense such enhancements or +derivative works thereof, in binary and source code form. + diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..a390938b --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,213 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +---------------------------------------------- + +L-BFGS CODE FROM COMPUTATIONAL CRYSTALLOGRAPHY TOOLBOX (CCTBX) + +This package includes source code (training/lbfgs.h) based on source +code distributed as part of the Compational Crystallography Toolbox +(CCTBX), which has separate copyright notices and license terms. Use of +this source code is subject to the terms and conditions of the license +contained in the file LICENSE.cctbx . + @@ -50,19 +50,6 @@ COPYRIGHT AND LICENSE ------------------------------------------------------------------------------ Copyright (c) 2009 by Chris Dyer <redpony@gmail.com> -Licensed under the Apache License, Version 2.0 (the "License"); you may -not use this file except in compliance with the License. You may obtain -a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -The LBFGS implementation contains code from the Computational -Crystallography Toolbox which is copyright (c) 2006 by The Regents of the -University of California, through Lawrence Berkeley National Laboratory. -For more information on their license, refer to http://cctbx.sourceforge.net/ +See the file LICENSE.txt for the licensing terms that this software is +released under. + diff --git a/src/array2d.h b/src/array2d.h index 09d84d0b..e63eda0d 100644 --- a/src/array2d.h +++ b/src/array2d.h @@ -19,6 +19,7 @@ class Array2D { width_(w), height_(h), data_(w*h, d) {} Array2D(const Array2D& rhs) : width_(rhs.width_), height_(rhs.height_), data_(rhs.data_) {} + bool empty() const { return data_.empty(); } void resize(int w, int h, const T& d = T()) { data_.resize(w * h, d); width_ = w; diff --git a/src/bottom_up_parser.cc b/src/bottom_up_parser.cc index 349ed2de..b3315b8a 100644 --- a/src/bottom_up_parser.cc +++ b/src/bottom_up_parser.cc @@ -24,8 +24,18 @@ class PassiveChart { inline int GetGoalIndex() const { return goal_idx_; } private: - void ApplyRules(const int i, const int j, const RuleBin* rules, const Hypergraph::TailNodeVector& tail); - void ApplyRule(const int i, const int j, TRulePtr r, const Hypergraph::TailNodeVector& ant_nodes); + void ApplyRules(const int i, + const int j, + const RuleBin* rules, + const Hypergraph::TailNodeVector& tail, + const float lattice_cost); + + void ApplyRule(const int i, + const int j, + const TRulePtr& r, + const Hypergraph::TailNodeVector& ant_nodes, + const float lattice_cost); + void ApplyUnaryRules(const int i, const int j); const vector<GrammarPtr>& grammars_; @@ -38,6 +48,7 @@ class PassiveChart { const WordID goal_cat_; // category that is being searched for at [0,n] TRulePtr goal_rule_; int goal_idx_; // index of goal node, if found + const int lc_fid_; static WordID kGOAL; // [Goal] }; @@ -51,12 +62,12 @@ class ActiveChart { act_chart_(psv_chart.size(), psv_chart.size()), psv_chart_(psv_chart) {} struct ActiveItem { - ActiveItem(const GrammarIter* g, const Hypergraph::TailNodeVector& a, double lcost) : + ActiveItem(const GrammarIter* g, const Hypergraph::TailNodeVector& a, float lcost) : gptr_(g), ant_nodes_(a), lattice_cost(lcost) {} explicit ActiveItem(const GrammarIter* g) : - gptr_(g), ant_nodes_(), lattice_cost() {} + gptr_(g), ant_nodes_(), lattice_cost(0.0) {} - void ExtendTerminal(int symbol, double src_cost, vector<ActiveItem>* out_cell) const { + void ExtendTerminal(int symbol, float src_cost, vector<ActiveItem>* out_cell) const { const GrammarIter* ni = gptr_->Extend(symbol); if (ni) out_cell->push_back(ActiveItem(ni, ant_nodes_, lattice_cost + src_cost)); } @@ -73,14 +84,14 @@ class ActiveChart { const GrammarIter* gptr_; Hypergraph::TailNodeVector ant_nodes_; - double lattice_cost; // TODO? use SparseVector<double> + float lattice_cost; // TODO? use SparseVector<double> }; inline const vector<ActiveItem>& operator()(int i, int j) const { return act_chart_(i,j); } void SeedActiveChart(const Grammar& g) { int size = act_chart_.width(); for (int i = 0; i < size; ++i) - if (g.HasRuleForSpan(i,i)) + if (g.HasRuleForSpan(i,i,0)) act_chart_(i,i).push_back(ActiveItem(g.GetRoot())); } @@ -132,7 +143,8 @@ PassiveChart::PassiveChart(const string& goal, nodemap_(input.size()+1, input.size()+1), goal_cat_(TD::Convert(goal) * -1), goal_rule_(new TRule("[Goal] ||| [" + goal + ",1] ||| [" + goal + ",1]")), - goal_idx_(-1) { + goal_idx_(-1), + lc_fid_(FD::Convert("LatticeCost")) { act_chart_.resize(grammars_.size()); for (int i = 0; i < grammars_.size(); ++i) act_chart_[i] = new ActiveChart(forest, *this); @@ -140,13 +152,19 @@ PassiveChart::PassiveChart(const string& goal, cerr << " Goal category: [" << goal << ']' << endl; } -void PassiveChart::ApplyRule(const int i, const int j, TRulePtr r, const Hypergraph::TailNodeVector& ant_nodes) { +void PassiveChart::ApplyRule(const int i, + const int j, + const TRulePtr& r, + const Hypergraph::TailNodeVector& ant_nodes, + const float lattice_cost) { Hypergraph::Edge* new_edge = forest_->AddEdge(r, ant_nodes); new_edge->prev_i_ = r->prev_i; new_edge->prev_j_ = r->prev_j; new_edge->i_ = i; new_edge->j_ = j; new_edge->feature_values_ = r->GetFeatureValues(); + if (lattice_cost) + new_edge->feature_values_.set_value(lc_fid_, lattice_cost); Cat2NodeMap& c2n = nodemap_(i,j); const bool is_goal = (r->GetLHS() == kGOAL); const Cat2NodeMap::iterator ni = c2n.find(r->GetLHS()); @@ -169,23 +187,24 @@ void PassiveChart::ApplyRule(const int i, const int j, TRulePtr r, const Hypergr void PassiveChart::ApplyRules(const int i, const int j, const RuleBin* rules, - const Hypergraph::TailNodeVector& tail) { + const Hypergraph::TailNodeVector& tail, + const float lattice_cost) { const int n = rules->GetNumRules(); for (int k = 0; k < n; ++k) - ApplyRule(i, j, rules->GetIthRule(k), tail); + ApplyRule(i, j, rules->GetIthRule(k), tail, lattice_cost); } void PassiveChart::ApplyUnaryRules(const int i, const int j) { const vector<int>& nodes = chart_(i,j); // reference is important! for (int gi = 0; gi < grammars_.size(); ++gi) { - if (!grammars_[gi]->HasRuleForSpan(i,j)) continue; + if (!grammars_[gi]->HasRuleForSpan(i,j,input_.Distance(i,j))) continue; for (int di = 0; di < nodes.size(); ++di) { const WordID& cat = forest_->nodes_[nodes[di]].cat_; const vector<TRulePtr>& unaries = grammars_[gi]->GetUnaryRulesForRHS(cat); for (int ri = 0; ri < unaries.size(); ++ri) { // cerr << "At (" << i << "," << j << "): applying " << unaries[ri]->AsString() << endl; const Hypergraph::TailNodeVector ant(1, nodes[di]); - ApplyRule(i, j, unaries[ri], ant); // may update nodes + ApplyRule(i, j, unaries[ri], ant, 0); // may update nodes } } } @@ -205,7 +224,7 @@ bool PassiveChart::Parse() { int j = i + l; for (int gi = 0; gi < grammars_.size(); ++gi) { const Grammar& g = *grammars_[gi]; - if (g.HasRuleForSpan(i, j)) { + if (g.HasRuleForSpan(i, j, input_.Distance(i, j))) { act_chart_[gi]->AdvanceDotsForAllItemsInCell(i, j, input_); const vector<ActiveChart::ActiveItem>& cell = (*act_chart_[gi])(i,j); @@ -213,7 +232,7 @@ bool PassiveChart::Parse() { ai != cell.end(); ++ai) { const RuleBin* rules = (ai->gptr_->GetRules()); if (!rules) continue; - ApplyRules(i, j, rules, ai->ant_nodes_); + ApplyRules(i, j, rules, ai->ant_nodes_, ai->lattice_cost); } } } @@ -222,7 +241,7 @@ bool PassiveChart::Parse() { for (int gi = 0; gi < grammars_.size(); ++gi) { const Grammar& g = *grammars_[gi]; // deal with non-terminals that were just proved - if (g.HasRuleForSpan(i, j)) + if (g.HasRuleForSpan(i, j, input_.Distance(i,j))) act_chart_[gi]->ExtendActiveItems(i, i, j); } } @@ -231,7 +250,7 @@ bool PassiveChart::Parse() { const Hypergraph::Node& node = forest_->nodes_[dh[di]]; if (node.cat_ == goal_cat_) { Hypergraph::TailNodeVector ant(1, node.id_); - ApplyRule(0, input_.size(), goal_rule_, ant); + ApplyRule(0, input_.size(), goal_rule_, ant, 0); } } } diff --git a/src/cdec_ff.cc b/src/cdec_ff.cc index 846a908e..0a4f3d5e 100644 --- a/src/cdec_ff.cc +++ b/src/cdec_ff.cc @@ -11,6 +11,7 @@ boost::shared_ptr<FFRegistry> global_ff_registry; void register_feature_functions() { global_ff_registry->Register("LanguageModel", new FFFactory<LanguageModel>); global_ff_registry->Register("WordPenalty", new FFFactory<WordPenalty>); + global_ff_registry->Register("SourceWordPenalty", new FFFactory<SourceWordPenalty>); global_ff_registry->Register("RelativeSentencePosition", new FFFactory<RelativeSentencePosition>); global_ff_registry->Register("MarkovJump", new FFFactory<MarkovJump>); global_ff_registry->Register("BlunsomSynchronousParseHack", new FFFactory<BlunsomSynchronousParseHack>); @@ -36,6 +36,27 @@ void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, features->set_value(fid_, edge.rule_->EWords() * value_); } +SourceWordPenalty::SourceWordPenalty(const string& param) : + fid_(FD::Convert("SourceWordPenalty")), + value_(-1.0 / log(10)) { + if (!param.empty()) { + cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl; + } +} + +void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_states, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* state) const { + (void) smeta; + (void) ant_states; + (void) state; + (void) estimated_features; + features->set_value(fid_, edge.rule_->FWords() * value_); +} + ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) : models_(models), weights_(w), @@ -89,6 +89,21 @@ class WordPenalty : public FeatureFunction { const double value_; }; +class SourceWordPenalty : public FeatureFunction { + public: + SourceWordPenalty(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + private: + const int fid_; + const double value_; +}; + // this class is a set of FeatureFunctions that can be used to score, rescore, // etc. a (translation?) forest class ModelSet { diff --git a/src/grammar.cc b/src/grammar.cc index e0b2a09e..e19bd344 100644 --- a/src/grammar.cc +++ b/src/grammar.cc @@ -15,9 +15,10 @@ RuleBin::~RuleBin() {} GrammarIter::~GrammarIter() {} Grammar::~Grammar() {} -bool Grammar::HasRuleForSpan(int i, int j) const { +bool Grammar::HasRuleForSpan(int i, int j, int distance) const { (void) i; (void) j; + (void) distance; return true; // always true by default } @@ -119,8 +120,8 @@ void TextGrammar::ReadFromFile(const string& filename) { cerr << " " << rule_count << " rules read.\n"; } -bool TextGrammar::HasRuleForSpan(int i, int j) const { - return (max_span_ >= (j - i)); +bool TextGrammar::HasRuleForSpan(int i, int j, int distance) const { + return (max_span_ >= distance); } GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} @@ -136,7 +137,7 @@ GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt) { //cerr << "GLUE: " << glue->AsString() << endl; } -bool GlueGrammar::HasRuleForSpan(int i, int j) const { +bool GlueGrammar::HasRuleForSpan(int i, int j, int distance) const { (void) j; return (i == 0); } @@ -156,7 +157,7 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat) } } -bool PassThroughGrammar::HasRuleForSpan(int i, int j) const { +bool PassThroughGrammar::HasRuleForSpan(int i, int j, int distance) const { const set<int>& hr = has_rule_[i]; if (i == j) { return !hr.empty(); } return (hr.find(j) != hr.end()); diff --git a/src/grammar.h b/src/grammar.h index 4a03c505..3471e3f1 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -28,7 +28,7 @@ struct Grammar { virtual ~Grammar(); virtual const GrammarIter* GetRoot() const = 0; - virtual bool HasRuleForSpan(int i, int j) const; + virtual bool HasRuleForSpan(int i, int j, int distance) const; // cat is the category to be rewritten inline const std::vector<TRulePtr>& GetAllUnaryRules() const { @@ -59,7 +59,7 @@ struct TextGrammar : public Grammar { virtual const GrammarIter* GetRoot() const; void AddRule(const TRulePtr& rule); void ReadFromFile(const std::string& filename); - virtual bool HasRuleForSpan(int i, int j) const; + virtual bool HasRuleForSpan(int i, int j, int distance) const; const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const; private: int max_span_; @@ -70,12 +70,12 @@ struct GlueGrammar : public TextGrammar { // read glue grammar from file explicit GlueGrammar(const std::string& file); GlueGrammar(const std::string& goal_nt, const std::string& default_nt); // "S", "X" - virtual bool HasRuleForSpan(int i, int j) const; + virtual bool HasRuleForSpan(int i, int j, int distance) const; }; struct PassThroughGrammar : public TextGrammar { PassThroughGrammar(const Lattice& input, const std::string& cat); - virtual bool HasRuleForSpan(int i, int j) const; + virtual bool HasRuleForSpan(int i, int j, int distance) const; private: std::vector<std::set<int> > has_rule_; // index by [i][j] }; diff --git a/src/lattice.cc b/src/lattice.cc index aa1df3db..56bc9551 100644 --- a/src/lattice.cc +++ b/src/lattice.cc @@ -5,6 +5,39 @@ using namespace std; +static const int kUNREACHABLE = 99999999; + +void Lattice::ComputeDistances() { + const int n = this->size() + 1; + dist_.resize(n, n, kUNREACHABLE); + for (int i = 0; i < this->size(); ++i) { + const vector<LatticeArc>& alts = (*this)[i]; + for (int j = 0; j < alts.size(); ++j) + dist_(i, i + alts[j].dist2next) = 1; + } + for (int k = 0; k < n; ++k) { + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + const int dp = dist_(i,k) + dist_(k,j); + if (dist_(i,j) > dp) + dist_(i,j) = dp; + } + } + } + + for (int i = 0; i < n; ++i) { + int latest = kUNREACHABLE; + for (int j = n-1; j >= 0; --j) { + const int c = dist_(i,j); + if (c < kUNREACHABLE) + latest = c; + else + dist_(i,j) = latest; + } + } + // cerr << dist_ << endl; +} + bool LatticeTools::LooksLikePLF(const string &line) { return (line.size() > 5) && (line.substr(0,4) == "((('"); } @@ -23,5 +56,6 @@ void LatticeTools::ConvertTextOrPLF(const string& text_or_plf, Lattice* pl) { HypergraphIO::PLFtoLattice(text_or_plf, pl); else ConvertTextToLattice(text_or_plf, pl); + pl->ComputeDistances(); } diff --git a/src/lattice.h b/src/lattice.h index 1177e768..71589b92 100644 --- a/src/lattice.h +++ b/src/lattice.h @@ -4,6 +4,14 @@ #include <string> #include <vector> #include "wordid.h" +#include "array2d.h" + +class Lattice; +struct LatticeTools { + static bool LooksLikePLF(const std::string &line); + static void ConvertTextToLattice(const std::string& text, Lattice* pl); + static void ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl); +}; struct LatticeArc { WordID label; @@ -14,18 +22,20 @@ struct LatticeArc { }; class Lattice : public std::vector<std::vector<LatticeArc> > { + friend void LatticeTools::ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl); public: Lattice() {} explicit Lattice(size_t t, const std::vector<LatticeArc>& v = std::vector<LatticeArc>()) : std::vector<std::vector<LatticeArc> >(t, v) {} - - // TODO add distance functions -}; + int Distance(int from, int to) const { + if (dist_.empty()) + return (to - from); + return dist_(from, to); + } -struct LatticeTools { - static bool LooksLikePLF(const std::string &line); - static void ConvertTextToLattice(const std::string& text, Lattice* pl); - static void ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl); + private: + void ComputeDistances(); + Array2D<int> dist_; }; #endif diff --git a/src/synparse.cc b/src/synparse.cc deleted file mode 100644 index 96588f1e..00000000 --- a/src/synparse.cc +++ /dev/null @@ -1,212 +0,0 @@ -#include <iostream> -#include <ext/hash_map> -#include <ext/hash_set> -#include <utility> - -#include <boost/multi_array.hpp> -#include <boost/functional/hash.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "prob.h" -#include "tdict.h" -#include "filelib.h" - -using namespace std; -using namespace __gnu_cxx; -namespace po = boost::program_options; - -const prob_t kMONO(1.0); // 0.6 -const prob_t kINV(1.0); // 0.1 -const prob_t kLEX(1.0); // 0.3 - -typedef hash_map<vector<WordID>, hash_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > >, boost::hash<vector<WordID> > > PTable; -typedef boost::multi_array<prob_t, 4> CChart; -typedef pair<int,int> SpanType; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("phrasetable,p",po::value<string>(), "[REQD] Phrase pairs for ITG alignment") - ("input,i",po::value<string>()->default_value("-"), "Input file") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (!conf->count("phrasetable")) { - cerr << "Please specify a grammar file with -p <GRAMMAR.TXT>\n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void LoadITGPhrasetable(const string& fname, PTable* ptable) { - const WordID sep = TD::Convert("|||"); - ReadFile rf(fname); - istream& in = *rf.stream(); - assert(in); - int lc = 0; - while(in) { - string line; - getline(in, line); - if (line.empty()) continue; - ++lc; - vector<WordID> full, f, e; - TD::ConvertSentence(line, &full); - int i = 0; - for (; i < full.size(); ++i) { - if (full[i] == sep) break; - f.push_back(full[i]); - } - ++i; - for (; i < full.size(); ++i) { - if (full[i] == sep) break; - e.push_back(full[i]); - } - ++i; - prob_t prob(0.000001); - if (i < full.size()) { prob = prob_t(atof(TD::Convert(full[i]))); ++i; } - - if (i < full.size()) { cerr << "Warning line " << lc << " has extra stuff.\n"; } - assert(f.size() > 0); - assert(e.size() > 0); - (*ptable)[f][e] = prob; - } - cerr << "Read " << lc << " phrase pairs\n"; -} - -void FindPhrases(const vector<WordID>& e, const vector<WordID>& f, const PTable& pt, CChart* pcc) { - CChart& cc = *pcc; - const size_t n = f.size(); - const size_t m = e.size(); - typedef hash_map<vector<WordID>, vector<SpanType>, boost::hash<vector<WordID> > > PhraseToSpan; - PhraseToSpan e_locations; - for (int i = 0; i < m; ++i) { - const int mel = m - i; - vector<WordID> e_phrase; - for (int el = 0; el < mel; ++el) { - e_phrase.push_back(e[i + el]); - e_locations[e_phrase].push_back(make_pair(i, i + el + 1)); - } - } - //cerr << "Cached the locations of " << e_locations.size() << " e-phrases\n"; - - for (int s = 0; s < n; ++s) { - const int mfl = n - s; - vector<WordID> f_phrase; - for (int fl = 0; fl < mfl; ++fl) { - f_phrase.push_back(f[s + fl]); - PTable::const_iterator it = pt.find(f_phrase); - if (it == pt.end()) continue; - const hash_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > >& es = it->second; - for (hash_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > >::const_iterator eit = es.begin(); eit != es.end(); ++eit) { - PhraseToSpan::iterator loc = e_locations.find(eit->first); - if (loc == e_locations.end()) continue; - const vector<SpanType>& espans = loc->second; - for (int j = 0; j < espans.size(); ++j) { - cc[s][s + fl + 1][espans[j].first][espans[j].second] = eit->second; - //cerr << '[' << s << ',' << (s + fl + 1) << ',' << espans[j].first << ',' << espans[j].second << "] is C\n"; - } - } - } - } -} - -long long int evals = 0; - -void ProcessSynchronousCell(const int s, - const int t, - const int u, - const int v, - const prob_t& lex, - const prob_t& mono, - const prob_t& inv, - const CChart& tc, CChart* ntc) { - prob_t& inside = (*ntc)[s][t][u][v]; - // cerr << log(tc[s][t][u][v]) << " + " << log(lex) << endl; - inside = tc[s][t][u][v] * lex; - // cerr << " terminal span: " << log(inside) << endl; - if (t - s == 1) return; - if (v - u == 1) return; - for (int x = s+1; x < t; ++x) { - for (int y = u+1; y < v; ++y) { - const prob_t m = (*ntc)[s][x][u][y] * (*ntc)[x][t][y][v] * mono; - const prob_t i = (*ntc)[s][x][y][v] * (*ntc)[x][t][u][y] * inv; - // cerr << log(i) << "\t" << log(m) << endl; - inside += m; - inside += i; - evals++; - } - } - // cerr << " span: " << log(inside) << endl; -} - -prob_t SynchronousParse(const int n, const int m, const prob_t& lex, const prob_t& mono, const prob_t& inv, const CChart& tc, CChart* ntc) { - for (int fl = 0; fl < n; ++fl) { - for (int el = 0; el < m; ++el) { - const int ms = n - fl; - for (int s = 0; s < ms; ++s) { - const int t = s + fl + 1; - const int mu = m - el; - for (int u = 0; u < mu; ++u) { - const int v = u + el + 1; - //cerr << "Processing cell [" << s << ',' << t << ',' << u << ',' << v << "]\n"; - ProcessSynchronousCell(s, t, u, v, lex, mono, inv, tc, ntc); - } - } - } - } - return (*ntc)[0][n][0][m]; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - PTable ptable; - LoadITGPhrasetable(conf["phrasetable"].as<string>(), &ptable); - ReadFile rf(conf["input"].as<string>()); - istream& in = *rf.stream(); - int lc = 0; - const WordID sep = TD::Convert("|||"); - while(in) { - string line; - getline(in, line); - if (line.empty()) continue; - ++lc; - vector<WordID> full, f, e; - TD::ConvertSentence(line, &full); - int i = 0; - for (; i < full.size(); ++i) { - if (full[i] == sep) break; - f.push_back(full[i]); - } - ++i; - for (; i < full.size(); ++i) { - if (full[i] == sep) break; - e.push_back(full[i]); - } - if (e.empty()) cerr << "E is empty!\n"; - if (f.empty()) cerr << "F is empty!\n"; - if (e.empty() || f.empty()) continue; - int n = f.size(); - int m = e.size(); - cerr << "Synchronous chart has " << (n * n * m * m) << " cells\n"; - clock_t start = clock(); - CChart cc(boost::extents[n+1][n+1][m+1][m+1]); - FindPhrases(e, f, ptable, &cc); - CChart ntc(boost::extents[n+1][n+1][m+1][m+1]); - prob_t likelihood = SynchronousParse(n, m, kLEX, kMONO, kINV, cc, &ntc); - clock_t end = clock(); - cerr << "log Z: " << log(likelihood) << endl; - cerr << " Z: " << likelihood << endl; - double etime = (end - start) / 1000000.0; - cout << " time: " << etime << endl; - cout << "evals: " << evals << endl; - } - return 0; -} - diff --git a/tests/system_tests/controlled_synparse/gold.stdout b/tests/system_tests/controlled_synparse/gold.stdout index f3e0ce08..9e68069e 100644 --- a/tests/system_tests/controlled_synparse/gold.stdout +++ b/tests/system_tests/controlled_synparse/gold.stdout @@ -1,36 +1,36 @@ -0 ||| c b c d ||| F1bad2=1;F5=1;F6=1;F7=1;Glue=3 -0 ||| c d c b e ||| F1bad2=1;F4=1;F5=1;F7=1;Glue=1 -0 ||| c b c d ||| F1bad2=1;F6=1;F7=1;F9=1;Glue=2 -0 ||| b d c b e ||| F1bad1=1;F4=1;F5=1;F7=1;Glue=1 -0 ||| c b c d ||| F1bad2=1;F3=1;F5=1;F7=1;Glue=1 -0 ||| a d c b e ||| F1=1;F4=1;F5=1;F7=1;Glue=1 -0 ||| b b c d ||| F1bad1=1;F3=1;F5=1;F7=1;Glue=1 -0 ||| c d c b ||| F1bad2=1;F2=1;F5=1;F7=1;Glue=1 -0 ||| a b c d ||| F1=1;F3=1;F5=1;F7=1;Glue=1 -0 ||| b b c d ||| F1bad1=1;F5=1;F6=1;F7=1;Glue=3 -0 ||| b d c b ||| F1bad1=1;F2=1;F5=1;F7=1;Glue=1 -0 ||| a d c b ||| F1=1;F2=1;F5=1;F7=1;Glue=1 -0 ||| b b c d ||| F1bad1=1;F6=1;F7=1;F9=1;Glue=2 -0 ||| c c b d ||| F1bad2=1;F6=1;F7=1;F8=1;Glue=2 -0 ||| a b c d ||| F1=1;F6=1;F7=1;F9=1;Glue=2 -0 ||| a b c d ||| F1=1;F5=1;F6=1;F7=1;Glue=3 -0 ||| b c b d ||| F1bad1=1;F6=1;F7=1;F8=1;Glue=2 -0 ||| a c b d ||| F1=1;F6=1;F7=1;F8=1;Glue=2 -1 ||| c b c d ||| F1bad2=1;F5=1;F6=1;F7=1;Glue=3 -1 ||| c d c b e ||| F1bad2=1;F4=1;F5=1;F7=1;Glue=1 -1 ||| c b c d ||| F1bad2=1;F6=1;F7=1;F9=1;Glue=2 -1 ||| b d c b e ||| F1bad1=1;F4=1;F5=1;F7=1;Glue=1 -1 ||| c b c d ||| F1bad2=1;F3=1;F5=1;F7=1;Glue=1 -1 ||| a d c b e ||| F1=1;F4=1;F5=1;F7=1;Glue=1 -1 ||| b b c d ||| F1bad1=1;F3=1;F5=1;F7=1;Glue=1 -1 ||| c d c b ||| F1bad2=1;F2=1;F5=1;F7=1;Glue=1 -1 ||| a b c d ||| F1=1;F3=1;F5=1;F7=1;Glue=1 -1 ||| b b c d ||| F1bad1=1;F5=1;F6=1;F7=1;Glue=3 -1 ||| b d c b ||| F1bad1=1;F2=1;F5=1;F7=1;Glue=1 -1 ||| a d c b ||| F1=1;F2=1;F5=1;F7=1;Glue=1 -1 ||| b b c d ||| F1bad1=1;F6=1;F7=1;F9=1;Glue=2 -1 ||| c c b d ||| F1bad2=1;F6=1;F7=1;F8=1;Glue=2 -1 ||| a b c d ||| F1=1;F6=1;F7=1;F9=1;Glue=2 -1 ||| a b c d ||| F1=1;F5=1;F6=1;F7=1;Glue=3 -1 ||| b c b d ||| F1bad1=1;F6=1;F7=1;F8=1;Glue=2 -1 ||| a c b d ||| F1=1;F6=1;F7=1;F8=1;Glue=2 +0 ||| c b c d ||| F1bad2=1;F5=1;F6=1;F7=1;Glue=3 ||| 0 +0 ||| c d c b e ||| F1bad2=1;F4=1;F5=1;F7=1;Glue=1 ||| 0 +0 ||| c b c d ||| F1bad2=1;F6=1;F7=1;F9=1;Glue=2 ||| 0 +0 ||| b d c b e ||| F1bad1=1;F4=1;F5=1;F7=1;Glue=1 ||| 0 +0 ||| c b c d ||| F1bad2=1;F3=1;F5=1;F7=1;Glue=1 ||| 0 +0 ||| a d c b e ||| F1=1;F4=1;F5=1;F7=1;Glue=1 ||| 0 +0 ||| b b c d ||| F1bad1=1;F3=1;F5=1;F7=1;Glue=1 ||| 0 +0 ||| c d c b ||| F1bad2=1;F2=1;F5=1;F7=1;Glue=1 ||| 0 +0 ||| a b c d ||| F1=1;F3=1;F5=1;F7=1;Glue=1 ||| 0 +0 ||| b b c d ||| F1bad1=1;F5=1;F6=1;F7=1;Glue=3 ||| 0 +0 ||| b d c b ||| F1bad1=1;F2=1;F5=1;F7=1;Glue=1 ||| 0 +0 ||| a d c b ||| F1=1;F2=1;F5=1;F7=1;Glue=1 ||| 0 +0 ||| b b c d ||| F1bad1=1;F6=1;F7=1;F9=1;Glue=2 ||| 0 +0 ||| c c b d ||| F1bad2=1;F6=1;F7=1;F8=1;Glue=2 ||| 0 +0 ||| a b c d ||| F1=1;F6=1;F7=1;F9=1;Glue=2 ||| 0 +0 ||| a b c d ||| F1=1;F5=1;F6=1;F7=1;Glue=3 ||| 0 +0 ||| b c b d ||| F1bad1=1;F6=1;F7=1;F8=1;Glue=2 ||| 0 +0 ||| a c b d ||| F1=1;F6=1;F7=1;F8=1;Glue=2 ||| 0 +1 ||| c b c d ||| F1bad2=1;F5=1;F6=1;F7=1;Glue=3 ||| 0 +1 ||| c d c b e ||| F1bad2=1;F4=1;F5=1;F7=1;Glue=1 ||| 0 +1 ||| c b c d ||| F1bad2=1;F6=1;F7=1;F9=1;Glue=2 ||| 0 +1 ||| b d c b e ||| F1bad1=1;F4=1;F5=1;F7=1;Glue=1 ||| 0 +1 ||| c b c d ||| F1bad2=1;F3=1;F5=1;F7=1;Glue=1 ||| 0 +1 ||| a d c b e ||| F1=1;F4=1;F5=1;F7=1;Glue=1 ||| 0 +1 ||| b b c d ||| F1bad1=1;F3=1;F5=1;F7=1;Glue=1 ||| 0 +1 ||| c d c b ||| F1bad2=1;F2=1;F5=1;F7=1;Glue=1 ||| 0 +1 ||| a b c d ||| F1=1;F3=1;F5=1;F7=1;Glue=1 ||| 0 +1 ||| b b c d ||| F1bad1=1;F5=1;F6=1;F7=1;Glue=3 ||| 0 +1 ||| b d c b ||| F1bad1=1;F2=1;F5=1;F7=1;Glue=1 ||| 0 +1 ||| a d c b ||| F1=1;F2=1;F5=1;F7=1;Glue=1 ||| 0 +1 ||| b b c d ||| F1bad1=1;F6=1;F7=1;F9=1;Glue=2 ||| 0 +1 ||| c c b d ||| F1bad2=1;F6=1;F7=1;F8=1;Glue=2 ||| 0 +1 ||| a b c d ||| F1=1;F6=1;F7=1;F9=1;Glue=2 ||| 0 +1 ||| a b c d ||| F1=1;F5=1;F6=1;F7=1;Glue=3 ||| 0 +1 ||| b c b d ||| F1bad1=1;F6=1;F7=1;F8=1;Glue=2 ||| 0 +1 ||| a c b d ||| F1=1;F6=1;F7=1;F8=1;Glue=2 ||| 0 diff --git a/tests/system_tests/ftrans/gold.stdout b/tests/system_tests/ftrans/gold.stdout index 25c615d1..d4e48533 100644 --- a/tests/system_tests/ftrans/gold.stdout +++ b/tests/system_tests/ftrans/gold.stdout @@ -1,4 +1,4 @@ -0 ||| CB ||| F4=1;Inv=1 -0 ||| BC ||| F3=0.4;Mono=1 -0 ||| B C ||| F1=1;OtherFeat=1;F2=0.2;Mono=1 -0 ||| C B ||| F1=1;OtherFeat=1;F2=0.2;Inv=1 +0 ||| CB ||| F4=1;Inv=1 ||| 101 +0 ||| BC ||| F3=0.4;Mono=1 ||| 6 +0 ||| B C ||| F1=1;OtherFeat=1;F2=0.2;Mono=1 ||| 3 +0 ||| C B ||| F1=1;OtherFeat=1;F2=0.2;Inv=1 ||| 2 diff --git a/tests/system_tests/lattice/cdec.ini b/tests/system_tests/lattice/cdec.ini new file mode 100644 index 00000000..e0da7daf --- /dev/null +++ b/tests/system_tests/lattice/cdec.ini @@ -0,0 +1,5 @@ +formalism=scfg +feature_function=SourceWordPenalty +feature_function=WordPenalty +k_best=1000 +grammar=lattice.scfg diff --git a/tests/system_tests/lattice/gold.statistics b/tests/system_tests/lattice/gold.statistics new file mode 100644 index 00000000..48062f98 --- /dev/null +++ b/tests/system_tests/lattice/gold.statistics @@ -0,0 +1,9 @@ +-lm_nodes 6 +-lm_edges 10 +-lm_paths 5 +-lm_trans ab +-lm_trans -0.225 ++lm_nodes 6 ++lm_edges 10 ++lm_paths 5 ++lm_trans ab diff --git a/tests/system_tests/lattice/gold.stdout b/tests/system_tests/lattice/gold.stdout new file mode 100644 index 00000000..84f8ea78 --- /dev/null +++ b/tests/system_tests/lattice/gold.stdout @@ -0,0 +1,5 @@ +0 ||| ab ||| Cost=0.1;WordPenalty=-0.434294;SourceWordPenalty=-0.434294;LatticeCost=0.125 ||| -1.09359 +0 ||| cb ||| Cost=0.3;WordPenalty=-0.434294;SourceWordPenalty=-0.868589;LatticeCost=2.25 ||| -3.85288 +0 ||| a_b ||| Cost=0.2;WordPenalty=-0.434294;SourceWordPenalty=-0.868589;LatticeCost=2.5 ||| -4.00288 +0 ||| a b ||| Cost=0.3;Glue=1;WordPenalty=-0.868589;SourceWordPenalty=-0.868589;LatticeCost=2.5 ||| -4.53718 +0 ||| a' b ||| Cost=0.3;Glue=1;WordPenalty=-0.868589;SourceWordPenalty=-0.868589;LatticeCost=2.5 ||| -4.53718 diff --git a/tests/system_tests/lattice/input.txt b/tests/system_tests/lattice/input.txt new file mode 100644 index 00000000..e0cd1b57 --- /dev/null +++ b/tests/system_tests/lattice/input.txt @@ -0,0 +1 @@ +((('A',0.5,1),('C',0.25,1),('AB',0.125,2),),(('B',2,1),),) diff --git a/tests/system_tests/lattice/lattice.scfg b/tests/system_tests/lattice/lattice.scfg new file mode 100644 index 00000000..87a72383 --- /dev/null +++ b/tests/system_tests/lattice/lattice.scfg @@ -0,0 +1,6 @@ +[X] ||| A ||| a ||| Cost=0.1 +[X] ||| A ||| a' ||| Cost=0.1 +[X] ||| B ||| b ||| Cost=0.2 +[X] ||| AB ||| ab ||| Cost=0.1 +[X] ||| C B ||| cb ||| Cost=0.3 +[X] ||| A B ||| a_b ||| Cost=0.2 diff --git a/tests/system_tests/lattice/weights b/tests/system_tests/lattice/weights new file mode 100644 index 00000000..4746ff45 --- /dev/null +++ b/tests/system_tests/lattice/weights @@ -0,0 +1,4 @@ +WordPenalty 1 +SourceWordPenalty 1 +Cost -1 +LatticeCost -1 |