From cec82072a4d65797ed81c34db0bcf0a51af7d027 Mon Sep 17 00:00:00 2001 From: "adam.d.lopez" Date: Fri, 15 Oct 2010 16:32:23 +0000 Subject: Initial outline of decoding chapter git-svn-id: https://ws10smt.googlecode.com/svn/trunk@673 ec762483-ff6d-05da-a07a-a48fb63a330f --- report/biblio.bib | 363 +++++++++++++++++++++++++++++++++++++++++++++-- report/decoding.tex | 117 +++++++++++++++ report/onechap.tex | 54 +++++++ report/prune-tuned.pdf | Bin 0 -> 16731 bytes report/prune-untuned.pdf | Bin 0 -> 16104 bytes report/report.tex | 12 +- report/setup.tex | 2 +- 7 files changed, 531 insertions(+), 17 deletions(-) create mode 100644 report/decoding.tex create mode 100644 report/onechap.tex create mode 100644 report/prune-tuned.pdf create mode 100644 report/prune-untuned.pdf (limited to 'report') diff --git a/report/biblio.bib b/report/biblio.bib index fe0ab538..c7105c59 100644 --- a/report/biblio.bib +++ b/report/biblio.bib @@ -33,7 +33,184 @@ } -@string{acl-1989 = {27th Annual Meeting of the Association for Computational Linguistics (ACL-1989)}} @string{acl-1989-address = {Vancouver, British Columbia, Canada}} @string{acl-1995 = {33rd Annual Meeting of the Association for Computational Linguistics (ACL-1995)}} @string{acl-1995-address = {Cambridge, Massachusetts}} @string{acl-1996 = {34rd Annual Meeting of the Association for Computational Linguistics (ACL-1996)}} @string{acl-1996-address = {Santa Cruz, California}} @string{acl-1997 = {35th Annual Meeting of the Association for Computational Linguistics (ACL-1997)}} @string{acl-1997-address = {Madrid, Spain}} @string{acl-1998 = {36th Annual Meeting of the Association for Computational Linguistics and 17th International Conference on Computational Linguistics (ACL-CoLing-1998)}} @string{acl-1998-address = {Montreal, Canada}} @string{acl-1999 = {Proceedings of the 37th Annual Meeting of the Association for Computational Linguistics (ACL)}} @string{acl-1999-address = {College Park, Maryland}} @string{acl-2000 = {Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL-2000)}} @string{acl-2000-address = {Hong Kong}} @string{acl-2001 = {Proceedings of the 39th Annual Meeting of the Association for Computational Linguistics (ACL-2001)}} @string{acl-2001-address = {Toulouse, France}} @string{acl-2002 = {Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL-2002)}} @string{acl-2002-address = {Philadelphia, Pennsylvania}} @string{acl-2003 = {Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics (ACL-2003)}} @string{acl-2003-address = {Sapporo, Japan}} @string{acl-2004 = {Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics (ACL-2004)}} @string{acl-2004-address = {Barcelona, Spain}} @string{acl-2005 = {Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (ACL-2005)}} @string{acl-2005-address = {Ann Arbor, Michigan}} @string{acl-2006 = {Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics (ACL-CoLing-2006)}} @string{acl-2006-address = {Sydney, Australia}} @string{acl-2007 = {Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics (ACL-2007)}} @string{acl-2007-address = {Prague, Czech Republic}} @string{acl-2008 = {Proceedings of the 46th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies}} @string{acl-2008-address = {Colmbus, Ohio}} @string{acl-2009-address = {Singapore}} @string{amta-2002 = {Proceedings of the 5th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2002)}} @string{amta-2002-address = {Tiburon, California}} @string{amta-2004 = {Proceedings of the 6th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2004)}} @string{amta-2004-address = {Washington DC}} @string{amta-2006 = {Proceedings of the 7th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2006)}} @string{amta-2006-address = {Cambridge, Massachusetts}} @string{amta-2008 = {Proceedings of the 8th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2008)}} @string{amta-2008-address = {Honolulu, Hawaii}} @string{coling-2008 = {Proceedings of the 22nd International Conference on Computational Linguistics (COLING-2008)}} @string{coling-2008-address = {Manchester, England}} @string{eacl-1989 = {4th Conference of the European Chapter of the Association for Computational Linguistics (EACL-1989)}} @string{eacl-1989-address = {Manchester, England}} @string{eacl-2003 = {10th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2003)}} @string{eacl-2003-address = {Budapest, Hungary}} @string{eacl-2006 = {11th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2006)}} @string{eacl-2006-address = {Trento, Italy}} @string{eacl-2009 = {12th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2009)}} @string{eacl-2009-address = {Athens, Greece}} @string{emnlp-2000 = {2000 Joint SIGDAT Conference on Empirical Methods in Natural Language Processing and Very Large Corpora}} @string{emnlp-2000-address = {Hong Kong}} @string{emnlp-2001 = {Proceedings of the 2001 Conference on Empirical Methods in Natural Language Processing (EMNLP-2001)}} @string{emnlp-2001-address = {Pittsburgh, Pennsylvania}} @string{emnlp-2002 = {Proceedings of the 2002 Conference on Empirical Methods in Natural Language Processing (EMNLP-2002)}} @string{emnlp-2002-address = {Philadelphia, Pennsylvania}} @string{emnlp-2003 = {Proceedings of the 2003 Conference on Empirical Methods in Natural Language Processing (EMNLP-2003)}} @string{emnlp-2003-address = {Sapporo, Japan}} @string{emnlp-2004 = {Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing (EMNLP-2004)}} @string{emnlp-2004-address = {Barcelona, Spain}} @string{emnlp-2005 = {Proceedings of the 2005 Conference on Empirical Methods in Natural Language Processing (EMNLP-2005)}} @string{emnlp-2005-address = {Vancouver, British Columbia., Canada}} @string{emnlp-2006 = {Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing (EMNLP-2006)}} @string{emnlp-2006-address = {Sydney, Australia}} @string{emnlp-2007 = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)}} @string{emnlp-2007-address = {Prague, Czech Republic}} @string{emnlp-2008 = {Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing (EMNLP-2008)}} @string{emnlp-2008-address = {Honolulu, Hawaii}} @string{emnlp-2009 = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing (EMNLP-2009)}} @string{emnlp-2009-address = {Singapore}} @string{hlt-2002 = {Proceedings of Second International Conference on Human Language Technology Research (HLT-02)}} @string{hlt-2002-address = {San Diego}} @string{hlt-naacl-2003 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2003)}} @string{hlt-naacl-2003-address = {Edmonton, Alberta}} @string{hlt-naacl-2004 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2004)}} @string{hlt-naacl-2004-address = {Boston, Massachusetts}} @string{hlt-naacl-2006 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2006)}} @string{hlt-naacl-2006-address = {New York, New York}} @string{hlt-naacl-2007 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2007)}} @string{hlt-naacl-2007-address = {Rochester, New York}} @string{hlt-naacl-2009-address = {Boulder, Colorado}} @string{iwpt = {Proceedings of the International Workshop on Parsing Technologies}} @string{iwpt-2005-address = {Vancouver, BC, Canada}} @string{iwslt = {Proceedings of the International Workshop on Spoken Language Technology}} @string{kdd = {Proceeding of the ACM SIGKDD international conference on Knowledge discovery and data mining}} @string{kdd-2008-address = {New York}} @string{mt-summit-9-address = {New Orleans, Louisiana}} @string{naacl-2001 = {Second Meeting of the North American Chapter of the Association for Computational Linguistics}} @string{naacl-2001-address = {Pittsburgh, Pennsylvania}} @string{wmt = {Proceedings of the Workshop on Statistical Machine Translation}} +@string{acl-1989 = {27th Annual Meeting of the Association for Computational Linguistics (ACL-1989)}} + +@string{acl-1989-address = {Vancouver, British Columbia, Canada}} + +@string{acl-1995 = {33rd Annual Meeting of the Association for Computational Linguistics (ACL-1995)}} + +@string{acl-1995-address = {Cambridge, Massachusetts}} + +@string{acl-1996 = {34rd Annual Meeting of the Association for Computational Linguistics (ACL-1996)}} + +@string{acl-1996-address = {Santa Cruz, California}} + +@string{acl-1997 = {35th Annual Meeting of the Association for Computational Linguistics (ACL-1997)}} + +@string{acl-1997-address = {Madrid, Spain}} + +@string{acl-1998 = {36th Annual Meeting of the Association for Computational Linguistics and 17th International Conference on Computational Linguistics (ACL-CoLing-1998)}} + +@string{acl-1998-address = {Montreal, Canada}} + +@string{acl-1999 = {Proceedings of the 37th Annual Meeting of the Association for Computational Linguistics (ACL)}} + +@string{acl-1999-address = {College Park, Maryland}} + +@string{acl-2000 = {Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL-2000)}} + +@string{acl-2000-address = {Hong Kong}} + +@string{acl-2001 = {Proceedings of the 39th Annual Meeting of the Association for Computational Linguistics (ACL-2001)}} + +@string{acl-2001-address = {Toulouse, France}} + +@string{acl-2002 = {Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL-2002)}} + +@string{acl-2002-address = {Philadelphia, Pennsylvania}} + +@string{acl-2003 = {Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics (ACL-2003)}} + +@string{acl-2003-address = {Sapporo, Japan}} + +@string{acl-2004 = {Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics (ACL-2004)}} + +@string{acl-2004-address = {Barcelona, Spain}} + +@string{acl-2005 = {Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (ACL-2005)}} + +@string{acl-2005-address = {Ann Arbor, Michigan}} + +@string{acl-2006 = {Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics (ACL-CoLing-2006)}} + +@string{acl-2006-address = {Sydney, Australia}} + +@string{acl-2007 = {Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics (ACL-2007)}} + +@string{acl-2007-address = {Prague, Czech Republic}} + +@string{acl-2008 = {Proceedings of the 46th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies}} + +@string{acl-2008-address = {Colmbus, Ohio}} + +@string{acl-2009-address = {Singapore}} + +@string{amta-2002 = {Proceedings of the 5th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2002)}} + +@string{amta-2002-address = {Tiburon, California}} + +@string{amta-2004 = {Proceedings of the 6th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2004)}} + +@string{amta-2004-address = {Washington DC}} + +@string{amta-2006 = {Proceedings of the 7th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2006)}} + +@string{amta-2006-address = {Cambridge, Massachusetts}} + +@string{amta-2008 = {Proceedings of the 8th Biennial Conference of the Association for Machine Translation in the Americas (AMTA-2008)}} + +@string{amta-2008-address = {Honolulu, Hawaii}} + +@string{coling-2008 = {Proceedings of the 22nd International Conference on Computational Linguistics (COLING-2008)}} + +@string{coling-2008-address = {Manchester, England}} + +@string{eacl-1989 = {4th Conference of the European Chapter of the Association for Computational Linguistics (EACL-1989)}} + +@string{eacl-1989-address = {Manchester, England}} + +@string{eacl-2003 = {10th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2003)}} + +@string{eacl-2003-address = {Budapest, Hungary}} + +@string{eacl-2006 = {11th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2006)}} + +@string{eacl-2006-address = {Trento, Italy}} + +@string{eacl-2009 = {12th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2009)}} + +@string{eacl-2009-address = {Athens, Greece}} + +@string{emnlp-2000 = {2000 Joint SIGDAT Conference on Empirical Methods in Natural Language Processing and Very Large Corpora}} + +@string{emnlp-2000-address = {Hong Kong}} + +@string{emnlp-2001 = {Proceedings of the 2001 Conference on Empirical Methods in Natural Language Processing (EMNLP-2001)}} + +@string{emnlp-2001-address = {Pittsburgh, Pennsylvania}} + +@string{emnlp-2002 = {Proceedings of the 2002 Conference on Empirical Methods in Natural Language Processing (EMNLP-2002)}} + +@string{emnlp-2002-address = {Philadelphia, Pennsylvania}} + +@string{emnlp-2003 = {Proceedings of the 2003 Conference on Empirical Methods in Natural Language Processing (EMNLP-2003)}} + +@string{emnlp-2003-address = {Sapporo, Japan}} + +@string{emnlp-2004 = {Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing (EMNLP-2004)}} + +@string{emnlp-2004-address = {Barcelona, Spain}} + +@string{emnlp-2005 = {Proceedings of the 2005 Conference on Empirical Methods in Natural Language Processing (EMNLP-2005)}} + +@string{emnlp-2005-address = {Vancouver, British Columbia., Canada}} + +@string{emnlp-2006 = {Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing (EMNLP-2006)}} + +@string{emnlp-2006-address = {Sydney, Australia}} + +@string{emnlp-2007 = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)}} + +@string{emnlp-2007-address = {Prague, Czech Republic}} + +@string{emnlp-2008 = {Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing (EMNLP-2008)}} + +@string{emnlp-2008-address = {Honolulu, Hawaii}} + +@string{emnlp-2009 = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing (EMNLP-2009)}} + +@string{emnlp-2009-address = {Singapore}} + +@string{hlt-2002 = {Proceedings of Second International Conference on Human Language Technology Research (HLT-02)}} + +@string{hlt-2002-address = {San Diego}} + +@string{hlt-naacl-2003 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2003)}} + +@string{hlt-naacl-2003-address = {Edmonton, Alberta}} + +@string{hlt-naacl-2004 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2004)}} + +@string{hlt-naacl-2004-address = {Boston, Massachusetts}} + +@string{hlt-naacl-2006 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2006)}} + +@string{hlt-naacl-2006-address = {New York, New York}} + +@string{hlt-naacl-2007 = {Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2007)}} + +@string{hlt-naacl-2007-address = {Rochester, New York}} + +@string{hlt-naacl-2009-address = {Boulder, Colorado}} + +@string{iwpt = {Proceedings of the International Workshop on Parsing Technologies}} + +@string{iwpt-2005-address = {Vancouver, BC, Canada}} + +@string{iwslt = {Proceedings of the International Workshop on Spoken Language Technology}} + +@string{kdd = {Proceeding of the ACM SIGKDD international conference on Knowledge discovery and data mining}} + +@string{kdd-2008-address = {New York}} + +@string{mt-summit-9-address = {New Orleans, Louisiana}} + +@string{naacl-2001 = {Second Meeting of the North American Chapter of the Association for Computational Linguistics}} + +@string{naacl-2001-address = {Pittsburgh, Pennsylvania}} + +@string{wmt = {Proceedings of the Workshop on Statistical Machine Translation}} + @inproceedings{Chiang2005, address = acl-2005-address, @@ -42,25 +219,95 @@ title = {A Hierarchical Phrase-based Model for Statistical Machine Translation}, year = {2005}} - @inproceedings{Koehn2003, Address = hlt-naacl-2003-address, Author = {Philipp Koehn and Franz Josef Och and Daniel Marcu}, Booktitle = {{Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2003)}}, Title = {Statistical Phrase-Based Translation}, Url = {http://www.isi.edu/~koehn/publications/phrase2003.pdf}, Year = {2003}} - @inproceedings{Koehn2004, Address = amta-2004-address, Author = {Philipp Koehn}, Booktitle = amta-2004, Title = {Pharaoh: A Beam Search Decoder for Phrase-Based Statistical Machine Translation Models}, Url = {http://www.iccs.informatics.ed.ac.uk/~pkoehn/publications/pharaoh-amta2004.pdf}, Year = {2004}} +@inproceedings{Koehn2003, + Address = hlt-naacl-2003-address, + Author = {Philipp Koehn and Franz Josef Och and Daniel Marcu}, + Booktitle = {{Proceedings of the Human Language Technology Conference of the North American chapter of the Association for Computational Linguistics (HLT/NAACL-2003)}}, + Title = {Statistical Phrase-Based Translation}, + Url = {http://www.isi.edu/~koehn/publications/phrase2003.pdf}, + Year = {2003}} + + +@inproceedings{Koehn2004, + Address = amta-2004-address, + Author = {Philipp Koehn}, + Booktitle = amta-2004, + Title = {Pharaoh: A Beam Search Decoder for Phrase-Based Statistical Machine Translation Models}, + Url = {http://www.iccs.informatics.ed.ac.uk/~pkoehn/publications/pharaoh-amta2004.pdf}, + Year = {2004}} + + +@inproceedings{Tillmann2003, + Address = emnlp-2003-address, + Author = {Christoph Tillmann}, + Booktitle = emnlp-2003, + Title = {A Projection Extension Algorithm for Statistical Machine Translation}, + Url = {http://acl.ldc.upenn.edu/W/W03/W03-1001.pdf}, + Year = {2003}} + + + +@inproceedings{Venugopal2003, + Address = acl-2003-address, + Author = {Ashish Venugopal and Stephan Vogel and Alex Waibel }, + Booktitle = acl-2003, + Title = {Effective Phrase Translation Extraction from Alignment Models}, + Year = {2003}} + + +@inproceedings{Galley2004, + Address = hlt-naacl-2004-address, + Author = {Galley, Michel and Hopkins, Mark and Knight, Kevin and Marcu, Daniel}, + Booktitle = hlt-naacl-2004, + Title = {What's in a translation rule?}, + Url = {http://aclweb.org/anthology-new/N/N04/N04-1035.bib}, + Year = {2004}} - @inproceedings{Tillmann2003, Address = emnlp-2003-address, Author = {Christoph Tillmann}, Booktitle = emnlp-2003, Title = {A Projection Extension Algorithm for Statistical Machine Translation}, Url = {http://acl.ldc.upenn.edu/W/W03/W03-1001.pdf}, Year = {2003}} - @inproceedings{Venugopal2003, Address = acl-2003-address, Author = {Ashish Venugopal and Stephan Vogel and Alex Waibel }, Booktitle = acl-2003, Title = {Effective Phrase Translation Extraction from Alignment Models}, Year = {2003}} +@article{ccg1982, + Author = {Anthony Ades and Mark Steedman}, + Journal = {Linguistics and Philosophy}, + Pages = {517-558}, + Title = {On the Order of Words}, + Volume = {4}, + Year = {1982}} - @inproceedings{Galley2004, Address = hlt-naacl-2004-address, Author = {Galley, Michel and Hopkins, Mark and Knight, Kevin and Marcu, Daniel}, Booktitle = hlt-naacl-2004, Title = {What's in a translation rule?}, Url = {http://aclweb.org/anthology-new/N/N04/N04-1035.bib}, Year = {2004}} - @article{ccg1982, Author = {Anthony Ades and Mark Steedman}, Journal = {Linguistics and Philosophy}, Pages = {517-558}, Title = {On the Order of Words}, Volume = {4}, Year = {1982}} + +@inproceedings{Charniak1997, + Author = {Eugene Charniak}, + Booktitle = {Proceedings of AAAI}, + Title = {Statistical parsing with a context-free grammar and word statistics}, + Year = {1997}} + +@inproceedings{Collins1996, + Author = {Michael Collins}, + Booktitle = {Proceedings of ACL}, + Title = {A New Statistical Parser Based on Bigram Lexical Dependencies}, + Year = {1996}} + + +@article{Marcus1993, + author = {Mitchell P. Marcus and Mary Ann Marcinkiewicz and Beatrice Santorini}, + title = {Building a Large Annotated Corpus of {E}nglish: The Penn Treebank}, + journal = {Computational Linguistics}, + volume = {19}, + number = {2}, + year = {1993} +} + +@techreport{SCALE-report, + author = { Kathy Baker and Steven Bethard and Michael Bloodgood and Ralf Brown and Chris Callison-Burch and Glen Coppersmith and Bonnie Dorr and Wes Filardo and Kendall Giles and Anni Irvine and Mike Kayser and Lori Levin and Justin Martineau and Jim Mayfield and Scott Miller and Aaron Phillips and Andrew Philpot and Christine Piatko and Lane Schwartz and David Zajic }, + title = {Semantically Informed Machine Translation ({SIMT})}, + institution = {Human Language Technology Center Of Excellence}, + type = {{SCALE} Summer Workshop Final Report}, + year = {2009} +} - @inproceedings{Charniak1997, Author = {Eugene Charniak}, Booktitle = {Proceedings of AAAI}, Title = {Statistical parsing with a context-free grammar and word statistics}, Year = {1997}} - @inproceedings{Collins1996, Author = {Michael Collins}, Booktitle = {Proceedings of ACL}, Title = {A New Statistical Parser Based on Bigram Lexical Dependencies}, Year = {1996}} - @article{Marcus1993, author = {Mitchell P. Marcus and Mary Ann Marcinkiewicz and Beatrice Santorini}, title = {Building a Large Annotated Corpus of {E}nglish: The Penn Treebank}, journal = {Computational Linguistics}, volume = {19}, number = {2}, year = {1993} } - @techreport{SCALE-report, author = { Kathy Baker and Steven Bethard and Michael Bloodgood and Ralf Brown and Chris Callison-Burch and Glen Coppersmith and Bonnie Dorr and Wes Filardo and Kendall Giles and Anni Irvine and Mike Kayser and Lori Levin and Justin Martineau and Jim Mayfield and Scott Miller and Aaron Phillips and Andrew Philpot and Christine Piatko and Lane Schwartz and David Zajic }, title = {Semantically Informed Machine Translation ({SIMT})}, institution = {Human Language Technology Center Of Excellence}, type = {{SCALE} Summer Workshop Final Report}, year = {2009} } @article{chiang:2007, Author = {David Chiang}, @@ -71,7 +318,7 @@ Volume = {33}, Year = {2007}} - + @inproceedings{blunsom:2009, author = {Blunsom, Phil and Cohn, Trevor and Goldwater, Sharon and Johnson, Mark}, @@ -269,3 +516,95 @@ publisher = {Association for Computational Linguistics}, pages = {104--111}, } +@inproceedings{Huang+Chiang:2007:acl, + Author = {Liang Huang and David Chiang}, + Booktitle = {Proc. of ACL}, + Location = {Prague}, + Month = {Jun}, + Pages = {144--151}, + Title = {Forest Rescoring: Faster Decoding with Integrated Language Models}, + Year = {2007}} + +@InProceedings{dyer-EtAl:2010:Demos, + author = {Dyer, Chris and Lopez, Adam and Ganitkevitch, Juri and Weese, Jonathan and Ture, Ferhan and Blunsom, Phil and Setiawan, Hendra and Eidelman, Vladimir and Resnik, Philip}, + title = {cdec: A Decoder, Alignment, and Learning Framework for Finite-State and Context-Free Translation Models}, + booktitle = {Proceedings of the ACL 2010 System Demonstrations}, + month = {July}, + year = {2010}, + address = {Uppsala, Sweden}, + publisher = {Association for Computational Linguistics}, + pages = {7--12}, + url = {http://www.aclweb.org/anthology/P10-4002} +} + +@InProceedings{hopkins-langmead:2009:EMNLP, + author = {Hopkins, Mark and Langmead, Greg}, + title = {Cube Pruning as Heuristic Search}, + booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing}, + month = {August}, + year = {2009}, + address = {Singapore}, + publisher = {Association for Computational Linguistics}, + pages = {62--71}, + url = {http://www.aclweb.org/anthology/D/D09/D09-1007} +} + +@InProceedings{venugopal-zollmann-stephan:2007:main, + author = {Venugopal, Ashish and Zollmann, Andreas and Stephan, Vogel}, + title = {An Efficient Two-Pass Approach to Synchronous-{CFG} Driven Statistical {MT}}, + booktitle = {Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Proceedings of the Main Conference}, + month = {April}, + year = {2007}, + address = {Rochester, New York}, + publisher = {Association for Computational Linguistics}, + pages = {500--507}, + url = {http://www.aclweb.org/anthology/N/N07/N07-1063} +} + +@InProceedings{denero-EtAl:2009:NAACLHLT09, + author = {DeNero, John and Bansal, Mohit and Pauls, Adam and Klein, Dan}, + title = {Efficient Parsing for Transducer Grammars}, + booktitle = {Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics}, + month = {June}, + year = {2009}, + address = {Boulder, Colorado}, + publisher = {Association for Computational Linguistics}, + pages = {227--235}, + url = {http://www.aclweb.org/anthology/N/N09/N09-1026} +} + +@InProceedings{denero-pauls-klein:2009:Short, + author = {DeNero, John and Pauls, Adam and Klein, Dan}, + title = {Asynchronous Binarization for Synchronous Grammars}, + booktitle = {Proceedings of the ACL-IJCNLP 2009 Conference Short Papers}, + month = {August}, + year = {2009}, + address = {Suntec, Singapore}, + publisher = {Association for Computational Linguistics}, + pages = {141--144}, + url = {http://www.aclweb.org/anthology/P/P09/P09-2036} +} + +@InProceedings{iglesias-EtAl:2009:NAACLHLT09, + author = {Iglesias, Gonzalo and de Gispert, Adri\`{a} and R. Banga, Eduardo and Byrne, William}, + title = {Hierarchical Phrase-Based Translation with Weighted Finite State Transducers}, + booktitle = {Proceedings NAACL}, + month = {June}, + year = {2009}, + address = {Boulder, Colorado}, + publisher = {Association for Computational Linguistics}, + pages = {433--441}, + url = {http://www.aclweb.org/anthology/N/N09/N09-1049} +} + +@Inproceedings{hopkins-langmead:2010:EMNLP, + author = {Mark Hopkins and Greg Langmead}, + title = {{SCFG} Decoding Without Binarization}, + booktitle = {Proceedings of EMNLP}, + year = {2010} +} + + + + + diff --git a/report/decoding.tex b/report/decoding.tex new file mode 100644 index 00000000..9a4d8d9e --- /dev/null +++ b/report/decoding.tex @@ -0,0 +1,117 @@ +\chapter{Decoding Induced Grammars} +\label{chap:decoding} + + +So far this report has described several techniques for inducing synchronous grammars from text and shown that some of them lead to improvements in translation quality. Unfortunately, these improvements come at a cost: decoding with the induced grammars is more computationally expensive than decoding with the Hiero baseline grammars. Therefore, part of the workshop was devoted to research on efficient decoding techniques for induced grammars, the focus of this chapter. + +A simple example from our experimental setup illustrates the problem. On the Urdu-English task (\textsection\ref{sec:datasets}), the baseline Hiero grammar achieves a BLEU score of 20.8, while an induced 25-category grammar scores 21.7, a substantial improvement. However, using the same decoding algorithm, the Hiero grammar requires only 3.0 seconds to decode each sentence, while the 25-category grammar requires 52 seconds -- an order of magnitude slower. The prospects for decoding with grammars induced from larger data using more categories are even worse. + +Why is decoding so slow with these grammars? We first answer this question from a theoretical perspective (\textsection\ref{sec:overview}), and then measure the opportunity for improvement with the help of an oracle experiment (\textsection\ref{sec:oracle}). Finally we describe our contributions towards efficient decoding for these grammars, which include coarse-to-fine decoding (\textsection\ref{sec:ctf}) and grammar pruning techniques (\textsection\ref{sec:pruning}). + +\section{Decoding with Synchronous Context-free Grammars} \label{sec:overview} + +Informally, decoding with any synchronous context-free grammar is accomplished by first parsing the source sentence with the source projection of the grammar, and then reading off the string produced by the corresponding target parse. To illustrate this, suppose that we have the following grammar. +\begin{align*} + S &\rightarrow NP_1~VP_2 / NP_1~VP_2 \\ + NP &\rightarrow \textrm{ watashi wa } / \textrm{ I }\\ + NP &\rightarrow \textrm{ hako wo } / \textrm{ the box }\\ + VP &\rightarrow NP_1~V_2 / V_1~NP_2 \\ + V &\rightarrow \textrm{ akemasu } / \textrm{ open } +\end{align*} +We show a parse of the source sentence {\it watashi wa hako wo akemasu} under the source projection of this grammar below.\footnote{This example is borrowed from ???} +\begin{center} + \begin{tikzpicture}[edge from parent path= {(\tikzparentnode.south) -- (\tikzchildnode.north)}] + \node{S}[sibling distance=1in] + child {node {NP}[sibling distance=0.5in] + child {node {watashi}} + child {node {wa}}} + child {node {VP}[sibling distance=0.75in] + child {node {NP}[sibling distance=0.5in] + child {node {hako}} + child {node {wo}}} + child {node {V} + child {node {akemasu}}}}; + \end{tikzpicture} +\end{center} +This parse implies a corresponding unique target parse tree that is isomporphic to the source parse up to the ordering of child nonterminals and the identity and ordering of nonterminals, as illustrated below. All that is needed to obtain the translation is to read off the target nonterminal string at the leaves of the tree. +\begin{center} + \begin{tikzpicture}[edge from parent path= {(\tikzparentnode.south) -- (\tikzchildnode.north)}] + \node(S1a) at (0,0){S}[sibling distance=1in] + child {node(NP2a) {NP}[sibling distance=0.5in] + child {node {watashi}} + child {node {wa}}} + child {node(VP3a) {VP}[sibling distance=0.75in] + child {node (NP4a) {NP}[sibling distance=0.5in] + child {node {hako}} + child {node {wo}}} + child {node(V5a) {V} + child {node {akemasu}}}}; + \node (S1b) at (2.5in,0) {S}[sibling distance=1in] + child {node (NP2b) {NP}[sibling distance=0.5in] + child {node {I}}} + child {node (VP3b) {VP}[sibling distance=0.75in] + child {node (V5b) {V} + child {node {open}}} + child {node (NP4b) {NP}[sibling distance=0.5in] + child {node {the}} + child {node {box}}}}; + \draw[->,dotted] (S1a) -- (S1b); + \draw[->,dotted] (NP2a) ..controls +(1.25in,0.25in) .. (NP2b); + \draw[->,dotted] (VP3a) ..controls +(1.25in,-0.25in) .. (VP3b); + \draw[->,dotted] (NP4a) ..controls +(1.625in,0.25in) .. (NP4b); + \draw[->,dotted] (V5a) ..controls +(0.875in,-0.25in) .. (V5b); + \end{tikzpicture} +\end{center} +In general, however, there will be more than one parse of a given sentence -- indeed, there may be exponentially many parses. The set of all possible parses can can be computed efficiently using dynamic programming in the form of the CKY algorithm or its variants (we use CKY+). This produces a {\it hypergraph} (or {\it packed chart}) containing all parses, which requires at most $O(Gn^3)$ time to construct. The value $G$ is a grammar constant tied to properties of the grammar -- as a loose bound, it will be at least cubic in the size of the set of nonterminal symbols. Consequently, a key problem is that {\bf parsing time is strongly correlated with the number of nonterminal symbols in the grammar}. This is why parsing with 25 categories is an order of magnitude slower than parsing with two categories as in the baseline. + +Of course, parsing the source does not completely solve the translation problem, as in our translation models we need to consider the probability of each string under an $n$-gram language model. This intersection carries with it even worse computational overhead. A common approach to decoding with synchronous grammars is the {\it two-pass} strategy. +\begin{enumerate} + \item The source sentence is {\it exhaustively} parsed to produce a packed chart (called the {\it -LM hypergraph}). + \item This chart, itself a context-free grammar encoding all possible target sentences, is then intersected with an $n$-gram language model. The search space for this intersection is a much larger hypergraph (called the {\it +LM hypergraph}), constructed with heavy pruning to counteract complexity effects. +\end{enumerate} +Although some algorithms in the literature are only described implicitly in these terms, most of them follow this strategy \citep{Chiang2005,Huang+Chiang:2007:acl,venugopal-zollmann-stephan:2007:main,denero-EtAl:2009:NAACLHLT09,denero-pauls-klein:2009:Short,hopkins-langmead:2009:EMNLP,hopkins-langmead:2010:EMNLP,iglesias-EtAl:2009:NAACLHLT09}. The most popular of these, the {\it cube pruning} algorithm of \citet{Chiang2005}, is the baseline algorithm implemented in our decoder \citep{dyer-EtAl:2010:Demos}, making it a natural starting point for our exploration. The amount of work done in cube pruning is a constant multiplier times the size of the -LM hypergraph, so reducing its size is the key to producing faster two-pass algorithms. Therefore, we will consider approaches that construct a pruned -LM hypergraph. + +\section{Oracle Experiments}\label{sec:oracle} + +At the outset, we wondered what effect substituting a pruned -LM hypergraph for the full -LM hypergraph would have on translation accuracy. To measure this, we ran an oracle experiment. +\begin{enumerate} + \item Produce the complete -LM hypergraph. + \item Prune the -LM hypergraph using inside-outside pruning. + \item Integrate the language model on the pruned -LM hypergraph via cube pruning. +\end{enumerate} +The idea behind inside-outside pruning is simple: since we have access to the complete chart, we can calculate the ratio between the globally highest-scoring parse and the highest-scoring parse passing through any particular hyperedge. We prune the hyperedge if this ratio falls below some pruning threshold. By varying the threshold, we retain -LM hypergraphs of different sizes. + +An important point is that the -LM hypergraph incorporates only local features from the translation model and not the non-local language model feature. This has two consequences. First, it is possible to prune away the parse yielding the globally optimal translation under the combined model. Second, the ranking of parses under the translation model features is likely to be very poor. Nonetheless, it is important to recognize that the experiment is still idealized because it incorporates information from the full -LM hypergraph during pruning, and this information will not be available to us in practical methods that must avoid building the full -LM hypergraph altogether. + +The results of an initial oracle experiment are shown in Figure~\ref{figure:oracle-untuned}. These initial results are discouraging: we find that it is only possible to prune between twenty and thirty percent of the -LM hypergraph without incurring penalties in downstream BLEU score. + +\begin{figure} + \includegraphics[scale=0.5]{prune-untuned} + \caption{Oracle experiment: final BLEU score vs. percent of -LM forest retained after an initial inside-outside pruning pass. \label{figure:oracle-untuned}} +\end{figure} + +This initial result was disappointing, so we considered a different oracle approximation. In this case, we first tuned the local features using minimum error rate training, and used the subsequent set of feature weights for inside-outside pruning. This approximation turns out to produce a much better inside-outside pruning of the first-pass hypergraph. The results show (Figure~\ref{figure:oracle-tuned}) that it is possible to discard much more of this hypergraph before the second pass without harming overall accuracy -- possibly as much as three quarters of it. This result shows that heavy pruning of a -LM forest might be a means of improving decoding speed without harming translation accuracy. + +\begin{figure} + \includegraphics[scale=0.5]{prune-tuned} + \caption{Second oracle experiment: final BLEU score vs. percent of -LM forest retained after an initial inside-outside pruning pass using weights tuned for -LM decoding. \label{figure:oracle-untuned}} +\end{figure} + +\section{Coarse-to-Fine Decoding}\label{sec:ctf} + +Motivated by our oracle experiment, we implemented {\it coarse-to-fine parsing} to generate the -LM hypergraph. The idea behind the algorithm is simple: we first parse the source sentence using a projected grammar with a much lower grammar constant (the coarse grammar), which can be done very quickly. The resulting hypergraph is pruned using inside-outside, and we then parse the sentence again with the true grammar (the fine grammar), limiting the parse only to those edges whose projection appears in the pruned coarse hypergraph. If the original parse is sufficiently pruned, then this second pass will take much less time than an exhaustive parse with the same grammar. + +The first decision we need to make is how to obtain the coarse grammar. Many strategies are possible; for our experiments we chose to simply map all nonterminals to X. In essence, this means that we are decoding with the baseline Hiero grammar; although \citet{denero-EtAl:2009:NAACLHLT09} found that this simple strategy was ineffective for grammars estimated using parsers, the strategy makes intuitive sense for our induced grammars since they are generated by decorating Hiero grammars with finer-grained categories. As shown in Figure~\ref{fig:ctf}, we found that by varying the pruning parameter, we obtained different tradeoffs in speed and accuracy. Most encouraging, however, was the fact that we were able to lower decoding speed by nearly 40\% without any reduction in overall translation accuracy. + +An implementation detail that was important to our experiments was re-parsing. A subtle feature of coarse-to-fine parsing is that some coarse parses do not correspond to any parse in the fine grammar -- this is a consequence of the coarse grammar conflating two otherwise distinct nonterminal symbols. We discovered that a fairly common failure case for our parser occurred when all coarse parses corresponding to true fine parses were pruned away in the first pass. To work around this problem, whenever a fine parse was not found, we widened the beam and repruned the coarse hypergraph more conservatively. + +\section{Grammar Pruning}\label{sec:pruning} + +An alternative means of reducing the overhead of parsing is to prune the translation grammar. Although a pruned grammar will still retain the same number of nonterminal symbols, parsing time may be substantially improved if there are simply fewer + +\section{Discussion} + + + + + diff --git a/report/onechap.tex b/report/onechap.tex new file mode 100644 index 00000000..e055028c --- /dev/null +++ b/report/onechap.tex @@ -0,0 +1,54 @@ +%!TEX TS-program = pdflatex +%!TEX encoding = UTF-8 Unicode + +\documentclass[11pt]{report} +\usepackage{graphicx} +\usepackage{index} +\usepackage{varioref} +\usepackage{amsmath} +\usepackage{multirow} +\usepackage{theorem} % for examples +\usepackage{alltt} +\usepackage{ulem} +\usepackage{epic,eepic} +\usepackage{boxedminipage} +\usepackage{fancybox} +\usepackage{colortbl} +\usepackage[square]{natbib} +\usepackage{epsfig} +%\usepackage{subfig} +\usepackage{subfigure} +\usepackage{booktabs} +\usepackage{tikz} + +%\usepackage[encapsulated]{CJK} +%\usepackage{ucs} +\usepackage[utf8x]{inputenc} +% use one of bsmi(trad Chinese), gbsn(simp Chinese), min(Japanese), mj(Korean); see: +% /usr/share/texmf-dist/tex/latex/cjk/texinput/UTF8/*.fd +%\newcommand{\cntext}[1]{\begin{CJK}{UTF8}{gbsn}#1\end{CJK}} +\newcommand{\cntext}[1]{} + +\oddsidemargin 0mm +\evensidemargin 5mm +\topmargin -20mm +\textheight 240mm +\textwidth 160mm + + + +\newcommand{\bold}{\it} +\renewcommand{\emph}{\it} + +\makeindex +\theoremstyle{plain} + +\newcommand{\nt}[2]{\textrm{#1}_{\framebox[5pt]{\scriptsize #2}}} +\newcommand{\ind}[1]{{\fboxsep1pt\raisebox{-.5ex}{\fbox{{\tiny #1}}}}} + +\begin{document} + \include{decoding} + \bibliographystyle{apalike} + \bibliography{biblio} + \printindex +\end{document} diff --git a/report/prune-tuned.pdf b/report/prune-tuned.pdf new file mode 100644 index 00000000..02666e11 Binary files /dev/null and b/report/prune-tuned.pdf differ diff --git a/report/prune-untuned.pdf b/report/prune-untuned.pdf new file mode 100644 index 00000000..03897082 Binary files /dev/null and b/report/prune-untuned.pdf differ diff --git a/report/report.tex b/report/report.tex index 08b1b49d..5c37992e 100755 --- a/report/report.tex +++ b/report/report.tex @@ -1,3 +1,6 @@ +%!TEX TS-program = pdflatex +%!TEX encoding = UTF-8 Unicode + \documentclass[11pt]{report} \usepackage{graphicx} \usepackage{index} @@ -17,13 +20,13 @@ \usepackage{subfigure} \usepackage{booktabs} -\usepackage[encapsulated]{CJK} -\usepackage{ucs} +%\usepackage[encapsulated]{CJK} +%\usepackage{ucs} \usepackage[utf8x]{inputenc} % use one of bsmi(trad Chinese), gbsn(simp Chinese), min(Japanese), mj(Korean); see: % /usr/share/texmf-dist/tex/latex/cjk/texinput/UTF8/*.fd -\newcommand{\cntext}[1]{\begin{CJK}{UTF8}{gbsn}#1\end{CJK}} - +%\newcommand{\cntext}[1]{\begin{CJK}{UTF8}{gbsn}#1\end{CJK}} +\newcommand{\cntext}[1]{} \oddsidemargin 0mm \evensidemargin 5mm @@ -110,6 +113,7 @@ We especially would like to thank Fred Jelinek for heading the Summer School eff \include{np_clustering} \include{morphology/morphology} \include{pr-clustering/posterior} +\include{decoding} \include{training} \bibliographystyle{apalike} diff --git a/report/setup.tex b/report/setup.tex index e3357d3c..eb655a77 100644 --- a/report/setup.tex +++ b/report/setup.tex @@ -107,7 +107,7 @@ The notation used in the remainder of the paper for describing the clustering mo Brief overview of the pipeline, including phrase-extraction. -\section{Data sets} +\section{Data sets}\label{sec:datasets} \section{Evaluation} -- cgit v1.2.3