From 713f8e46b44bb0f72c03fb22e27589d5dab98ad0 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Sun, 18 Nov 2012 15:36:16 -0500 Subject: Add Python tests --- python/tests/test_decoder.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 python/tests/test_decoder.py (limited to 'python/tests') diff --git a/python/tests/test_decoder.py b/python/tests/test_decoder.py new file mode 100644 index 00000000..a74e6268 --- /dev/null +++ b/python/tests/test_decoder.py @@ -0,0 +1,41 @@ +#coding:utf8 +import os +import gzip +import cdec +import unittest +from nose.tools import assert_almost_equals, assert_equal + +weights = os.path.dirname(__file__)+'/../../tests/system_tests/australia/weights' +ref_weights = {'WordPenalty': -2.844814, 'LanguageModel': 1.0, 'PhraseModel_0': -1.066893, 'PhraseModel_1': -0.752247, 'PhraseModel_2': -0.589793, 'PassThrough': -20.0, 'Glue': 0} + +grammar_file = os.path.dirname(__file__)+'/../../tests/system_tests/australia/australia.scfg.gz' + +input_sentence = u'澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。' +ref_output_sentence = u'australia is have diplomatic relations with north korea one of the few countries .' +ref_f_tree = u'(S (S (S (S (X 澳洲 是)) (X (X 与 北韩) 有 邦交)) (X 的 少数 国家 之一)) (X 。))' +ref_e_tree = u'(S (S (S (S (X australia is)) (X have diplomatic relations (X with north korea))) (X one of the few countries)) (X .))' +ref_fvector = {'PhraseModel_2': 7.082652, 'Glue': 3.0, 'PhraseModel_0': 2.014353, 'PhraseModel_1': 8.591477} + +def assert_fvector_equal(vec, ref): + vecd = dict(vec) + assert_equal(set(vecd.keys()), set(ref.keys())) + for k, v in ref.items(): + assert_almost_equals(vec[k], v, 6) + +class TestDecoder(unittest.TestCase): + def setUp(self): + self.decoder = cdec.Decoder(formalism='scfg') + self.decoder.read_weights(weights) + with gzip.open(grammar_file) as f: + self.grammar = f.read() + + def test_weights(self): + assert_fvector_equal(self.decoder.weights, ref_weights) + + def test_translate(self): + forest = self.decoder.translate(input_sentence, grammar=self.grammar) + assert_equal(forest.viterbi(), ref_output_sentence) + f_tree, e_tree = forest.viterbi_trees() + assert_equal(f_tree, ref_f_tree) + assert_equal(e_tree, ref_e_tree) + assert_fvector_equal(forest.viterbi_features(), ref_fvector) -- cgit v1.2.3 From d094258e7cb75057af55eca41b65ba74fe3fb2c9 Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Mon, 7 Jan 2013 23:11:17 -0500 Subject: Rule extraction unit test --- python/tests/extractor/corpus.al.gz | Bin 0 -> 29803 bytes python/tests/extractor/corpus.fr-en.gz | Bin 0 -> 108160 bytes python/tests/extractor/gold-rules.sort | 140 +++++++++++++++++++++++++++++++++ python/tests/extractor/refmt.py | 20 +++++ python/tests/extractor/rules.sort | 140 +++++++++++++++++++++++++++++++++ python/tests/extractor/run.sh | 14 ++++ python/tests/extractor/test.in | 1 + python/tests/extractor/test.vis | 34 ++++++++ 8 files changed, 349 insertions(+) create mode 100644 python/tests/extractor/corpus.al.gz create mode 100644 python/tests/extractor/corpus.fr-en.gz create mode 100644 python/tests/extractor/gold-rules.sort create mode 100755 python/tests/extractor/refmt.py create mode 100644 python/tests/extractor/rules.sort create mode 100755 python/tests/extractor/run.sh create mode 100644 python/tests/extractor/test.in create mode 100644 python/tests/extractor/test.vis (limited to 'python/tests') diff --git a/python/tests/extractor/corpus.al.gz b/python/tests/extractor/corpus.al.gz new file mode 100644 index 00000000..88a3a8ee Binary files /dev/null and b/python/tests/extractor/corpus.al.gz differ diff --git a/python/tests/extractor/corpus.fr-en.gz b/python/tests/extractor/corpus.fr-en.gz new file mode 100644 index 00000000..b0b04215 Binary files /dev/null and b/python/tests/extractor/corpus.fr-en.gz differ diff --git a/python/tests/extractor/gold-rules.sort b/python/tests/extractor/gold-rules.sort new file mode 100644 index 00000000..c6f22c36 --- /dev/null +++ b/python/tests/extractor/gold-rules.sort @@ -0,0 +1,140 @@ +[X] ||| A B C D [X,1] ||| B C D [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-1 +[X] ||| A B C D ||| B C ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-1 +[X] ||| A B C [X,1] K ||| B [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 4-3 4-4 +[X] ||| A B C [X,1] ||| B [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| A ||| B ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| AA BB CC DD ||| AA BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 1-2 2-1 3-0 +[X] ||| AA BB CC [X,1] ||| [X,1] BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 1-2 2-1 +[X] ||| AA BB CC ||| BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-1 2-0 +[X] ||| AA BB [X,1] DD ||| AA [X,1] CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 1-2 3-0 +[X] ||| AA BB [X,1] ||| [X,1] CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-1 +[X] ||| AA BB ||| CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| AA [X,1] CC DD ||| AA BB [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 2-1 3-0 +[X] ||| AA [X,1] CC [X,2] ||| [X,2] BB [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 2-1 +[X] ||| AA [X,1] CC ||| BB [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 2-0 +[X] ||| AA [X,1] DD ||| AA [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 2-0 +[X] ||| AA [X,1] ||| [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 +[X] ||| AA ||| DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| BB CC DD ||| AA BB CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-1 2-0 +[X] ||| BB CC [X,1] ||| [X,1] BB CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-1 +[X] ||| BB CC ||| BB CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| BB [X,1] DD ||| AA [X,1] CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 2-0 +[X] ||| BB [X,1] ||| [X,1] CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 +[X] ||| BB ||| CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| CC DD ||| AA BB ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| CC [X,1] ||| [X,1] BB ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 +[X] ||| CC ||| BB ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| D E F G H ||| C D E F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 2-2 3-5 4-5 +[X] ||| D E F [X,1] K ||| C D E F G [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 2-2 4-7 4-8 +[X] ||| D E F [X,1] ||| C D E F G [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 2-2 +[X] ||| D E F ||| C D E F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 2-2 +[X] ||| D E [X,1] G H ||| C D [X,1] F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 3-5 4-5 +[X] ||| D E [X,1] ||| C D [X,1] F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 +[X] ||| D [X,1] F G H ||| C D E [X,1] G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 3-5 4-5 +[X] ||| D [X,1] F [X,2] K ||| C D E [X,1] G [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 4-7 4-8 +[X] ||| D [X,1] F [X,2] ||| C D E [X,1] G [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 +[X] ||| D [X,1] F ||| C D E [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 +[X] ||| D [X,1] G H [X,2] ||| C D [X,1] G H [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-4 3-4 +[X] ||| D [X,1] G H ||| C D [X,1] G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-4 3-4 +[X] ||| D [X,1] I J K ||| C D [X,1] I J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-3 3-3 4-5 4-6 +[X] ||| D [X,1] I J [X,2] ||| C D [X,1] I J [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-3 3-3 +[X] ||| D [X,1] I J ||| C D [X,1] I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-3 3-3 +[X] ||| D [X,1] K ||| C D [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-4 2-5 +[X] ||| D [X,1] L M N ||| C D [X,1] M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 4-3 4-4 +[X] ||| D [X,1] L M [X,2] ||| C D [X,1] [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| D [X,1] ||| C D [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| D ||| C ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| DD ||| AA ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| E F G H [X,1] ||| E F G H [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 2-3 3-3 +[X] ||| E F G H ||| E F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 2-3 3-3 +[X] ||| E F [X,1] I J ||| E F G [X,1] I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 3-4 4-4 +[X] ||| E F [X,1] K ||| E F G [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 3-5 3-6 +[X] ||| E F [X,1] ||| E F G [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| E F ||| E F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| E [X,1] G H [X,2] ||| [X,1] F G H [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 2-3 3-3 +[X] ||| E [X,1] G H ||| [X,1] F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 2-3 3-3 +[X] ||| E [X,1] ||| [X,1] F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 +[X] ||| E ||| F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| F ||| E ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| G H I J K ||| H I J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 2-1 3-1 4-3 4-4 +[X] ||| G H I J [X,1] ||| H I J [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 2-1 3-1 +[X] ||| G H I J ||| H I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 2-1 3-1 +[X] ||| G H [X,1] K ||| H [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 3-3 3-4 +[X] ||| G H [X,1] ||| H [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 +[X] ||| G H ||| H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 +[X] ||| I J K ||| I J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 2-2 2-3 +[X] ||| I J [X,1] ||| I J [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 +[X] ||| I J ||| I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 +[X] ||| K L M N [X,1] ||| K L M N [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 3-2 3-3 +[X] ||| K L M N ||| K L M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 3-2 3-3 +[X] ||| K L M [X,1] ||| K L [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 +[X] ||| K ||| K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 +[X] ||| N O P Q R ||| M N O P ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 1-2 2-3 4-2 +[X] ||| N O [X,1] Q R ||| M N O [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 1-2 4-2 +[X] ||| N [X,1] ||| M N [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 +[X] ||| N ||| M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 +[X] ||| O P Q R [X,1] ||| O P Q R S [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-1 3-0 +[X] ||| O P Q R ||| O P ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-1 3-0 +[X] ||| O [X,1] Q R [X,2] ||| O [X,1] Q R S [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-0 +[X] ||| O [X,1] Q R ||| O [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-0 +[X] ||| P ||| P ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| S T U V [X,1] ||| T U V [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-0 3-2 +[X] ||| S T U V ||| T U V ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-0 3-2 +[X] ||| S [X,1] U V [X,2] ||| [X,1] U V [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 3-2 +[X] ||| S [X,1] U V ||| [X,1] U V ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 3-2 +[X] ||| T ||| T ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| W X Y Z [X,1] ||| W X Y Z [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 3-0 +[X] ||| W X Y Z ||| W X Y ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 3-0 +[X] ||| W X [X,1] Z [X,2] ||| W X [X,1] Z [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-0 +[X] ||| W X [X,1] Z ||| W X [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-0 +[X] ||| Y ||| Y ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| [X,1] AA BB CC DD ||| [X,1] Z AA BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-5 2-4 3-3 4-2 +[X] ||| [X,1] AA BB CC [X,2] ||| [X,1] Z [X,2] BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-5 2-4 3-3 +[X] ||| [X,1] AA BB [X,2] DD ||| [X,1] Z AA [X,2] CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-5 2-4 4-2 +[X] ||| [X,1] AA BB [X,2] ||| [X,1] Z [X,2] CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-4 2-3 +[X] ||| [X,1] AA [X,2] CC DD ||| [X,1] Z AA BB [X,2] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-5 3-3 4-2 +[X] ||| [X,1] AA [X,2] DD ||| [X,1] Z AA [X,2] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-4 3-2 +[X] ||| [X,1] AA [X,2] ||| [X,1] Z [X,2] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 +[X] ||| [X,1] B C D [X,2] ||| [X,1] C D [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 3-1 +[X] ||| [X,1] B C D ||| [X,1] C ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 3-1 +[X] ||| [X,1] B C [X,2] K ||| [X,1] [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 4-3 4-4 +[X] ||| [X,1] BB CC DD ||| AA BB CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-1 3-0 +[X] ||| [X,1] BB CC [X,2] ||| [X,2] BB CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-1 +[X] ||| [X,1] BB CC ||| BB CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-0 +[X] ||| [X,1] BB [X,2] DD ||| AA [X,2] CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 3-0 +[X] ||| [X,1] BB [X,2] ||| [X,2] CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 +[X] ||| [X,1] BB ||| CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] CC DD ||| AA BB [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-0 +[X] ||| [X,1] CC [X,2] ||| [X,2] BB [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 +[X] ||| [X,1] CC ||| BB [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] DD ||| AA [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] E F G H ||| [X,1] D E F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 2-2 3-5 4-5 +[X] ||| [X,1] E F [X,2] K ||| [X,1] D E F G [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 2-2 4-7 4-8 +[X] ||| [X,1] E F [X,2] ||| [X,1] D E F G [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 2-2 +[X] ||| [X,1] E F ||| [X,1] D E F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 2-2 +[X] ||| [X,1] E [X,2] G H ||| [X,1] D [X,2] F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 3-5 4-5 +[X] ||| [X,1] E [X,2] ||| [X,1] D [X,2] F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 +[X] ||| [X,1] F G H [X,2] ||| E [X,1] G H [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 2-3 3-3 +[X] ||| [X,1] F G H ||| E [X,1] G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 2-3 3-3 +[X] ||| [X,1] F [X,2] I J ||| E [X,1] G [X,2] I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 3-4 4-4 +[X] ||| [X,1] F [X,2] K ||| E [X,1] G [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 3-5 3-6 +[X] ||| [X,1] F [X,2] ||| E [X,1] G [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] F ||| E [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] G H I J ||| [X,1] G H I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-2 3-3 4-3 +[X] ||| [X,1] G H [X,2] K ||| [X,1] G H [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-2 4-5 4-6 +[X] ||| [X,1] G H [X,2] ||| [X,1] G H [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-2 +[X] ||| [X,1] G H ||| [X,1] G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-2 +[X] ||| [X,1] I J K ||| [X,1] I J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-1 3-3 3-4 +[X] ||| [X,1] I J [X,2] ||| [X,1] I J [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-1 +[X] ||| [X,1] I J ||| [X,1] I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-1 +[X] ||| [X,1] K L M N ||| [X,1] J K L M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 1-3 4-4 4-5 +[X] ||| [X,1] K L M [X,2] ||| [X,1] J K L [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 1-3 +[X] ||| [X,1] K ||| [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 1-3 +[X] ||| [X,1] L M N [X,2] ||| [X,1] M N [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 3-1 3-2 +[X] ||| [X,1] L M N ||| [X,1] M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 3-1 3-2 +[X] ||| [X,1] O P Q R ||| [X,1] O P ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-2 4-1 +[X] ||| [X,1] O [X,2] Q R ||| [X,1] O [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 4-1 +[X] ||| [X,1] S T U V ||| [X,1] Q R S T U V ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-6 2-4 4-6 +[X] ||| [X,1] S [X,2] U V ||| [X,1] Q R S [X,2] U V ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-6 4-6 +[X] ||| [X,1] W X Y Z ||| [X,1] W X Y ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 3-3 4-1 +[X] ||| [X,1] W X [X,2] Z ||| [X,1] W X [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 4-1 diff --git a/python/tests/extractor/refmt.py b/python/tests/extractor/refmt.py new file mode 100755 index 00000000..437d5b7a --- /dev/null +++ b/python/tests/extractor/refmt.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +import collections, sys + +lines = [] +f = collections.defaultdict(int) +fe = collections.defaultdict(lambda: collections.defaultdict(int)) + +for line in sys.stdin: + tok = [x.strip() for x in line.split('|||')] + count = int(tok[4]) + f[tok[1]] += count + fe[tok[1]][tok[2]] += count + lines.append(tok) + +for tok in lines: + feat = 'IsSingletonF={0}.0 IsSingletonFE={1}.0'.format( + 0 if f[tok[1]] > 1 else 1, + 0 if fe[tok[1]][tok[2]] > 1 else 1) + print ' ||| '.join((tok[0], tok[1], tok[2], feat, tok[3])) diff --git a/python/tests/extractor/rules.sort b/python/tests/extractor/rules.sort new file mode 100644 index 00000000..c6f22c36 --- /dev/null +++ b/python/tests/extractor/rules.sort @@ -0,0 +1,140 @@ +[X] ||| A B C D [X,1] ||| B C D [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-1 +[X] ||| A B C D ||| B C ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-1 +[X] ||| A B C [X,1] K ||| B [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 4-3 4-4 +[X] ||| A B C [X,1] ||| B [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| A ||| B ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| AA BB CC DD ||| AA BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 1-2 2-1 3-0 +[X] ||| AA BB CC [X,1] ||| [X,1] BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 1-2 2-1 +[X] ||| AA BB CC ||| BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-1 2-0 +[X] ||| AA BB [X,1] DD ||| AA [X,1] CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 1-2 3-0 +[X] ||| AA BB [X,1] ||| [X,1] CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-1 +[X] ||| AA BB ||| CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| AA [X,1] CC DD ||| AA BB [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 2-1 3-0 +[X] ||| AA [X,1] CC [X,2] ||| [X,2] BB [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-3 2-1 +[X] ||| AA [X,1] CC ||| BB [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 2-0 +[X] ||| AA [X,1] DD ||| AA [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 2-0 +[X] ||| AA [X,1] ||| [X,1] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 +[X] ||| AA ||| DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| BB CC DD ||| AA BB CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-1 2-0 +[X] ||| BB CC [X,1] ||| [X,1] BB CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-1 +[X] ||| BB CC ||| BB CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| BB [X,1] DD ||| AA [X,1] CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 2-0 +[X] ||| BB [X,1] ||| [X,1] CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 +[X] ||| BB ||| CC ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| CC DD ||| AA BB ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| CC [X,1] ||| [X,1] BB ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 +[X] ||| CC ||| BB ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| D E F G H ||| C D E F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 2-2 3-5 4-5 +[X] ||| D E F [X,1] K ||| C D E F G [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 2-2 4-7 4-8 +[X] ||| D E F [X,1] ||| C D E F G [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 2-2 +[X] ||| D E F ||| C D E F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 2-2 +[X] ||| D E [X,1] G H ||| C D [X,1] F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 3-5 4-5 +[X] ||| D E [X,1] ||| C D [X,1] F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-3 +[X] ||| D [X,1] F G H ||| C D E [X,1] G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 3-5 4-5 +[X] ||| D [X,1] F [X,2] K ||| C D E [X,1] G [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 4-7 4-8 +[X] ||| D [X,1] F [X,2] ||| C D E [X,1] G [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 +[X] ||| D [X,1] F ||| C D E [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 +[X] ||| D [X,1] G H [X,2] ||| C D [X,1] G H [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-4 3-4 +[X] ||| D [X,1] G H ||| C D [X,1] G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-4 3-4 +[X] ||| D [X,1] I J K ||| C D [X,1] I J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-3 3-3 4-5 4-6 +[X] ||| D [X,1] I J [X,2] ||| C D [X,1] I J [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-3 3-3 +[X] ||| D [X,1] I J ||| C D [X,1] I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-3 3-3 +[X] ||| D [X,1] K ||| C D [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-4 2-5 +[X] ||| D [X,1] L M N ||| C D [X,1] M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 4-3 4-4 +[X] ||| D [X,1] L M [X,2] ||| C D [X,1] [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| D [X,1] ||| C D [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| D ||| C ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| DD ||| AA ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| E F G H [X,1] ||| E F G H [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 2-3 3-3 +[X] ||| E F G H ||| E F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 2-3 3-3 +[X] ||| E F [X,1] I J ||| E F G [X,1] I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 3-4 4-4 +[X] ||| E F [X,1] K ||| E F G [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 3-5 3-6 +[X] ||| E F [X,1] ||| E F G [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| E F ||| E F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 1-0 +[X] ||| E [X,1] G H [X,2] ||| [X,1] F G H [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 2-3 3-3 +[X] ||| E [X,1] G H ||| [X,1] F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 2-3 3-3 +[X] ||| E [X,1] ||| [X,1] F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-1 +[X] ||| E ||| F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| F ||| E ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| G H I J K ||| H I J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 2-1 3-1 4-3 4-4 +[X] ||| G H I J [X,1] ||| H I J [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 2-1 3-1 +[X] ||| G H I J ||| H I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 2-1 3-1 +[X] ||| G H [X,1] K ||| H [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 3-3 3-4 +[X] ||| G H [X,1] ||| H [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 +[X] ||| G H ||| H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 +[X] ||| I J K ||| I J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 2-2 2-3 +[X] ||| I J [X,1] ||| I J [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 +[X] ||| I J ||| I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-0 +[X] ||| K L M N [X,1] ||| K L M N [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 3-2 3-3 +[X] ||| K L M N ||| K L M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 3-2 3-3 +[X] ||| K L M [X,1] ||| K L [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 +[X] ||| K ||| K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 +[X] ||| N O P Q R ||| M N O P ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 1-2 2-3 4-2 +[X] ||| N O [X,1] Q R ||| M N O [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 1-2 4-2 +[X] ||| N [X,1] ||| M N [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 +[X] ||| N ||| M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 0-1 +[X] ||| O P Q R [X,1] ||| O P Q R S [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-1 3-0 +[X] ||| O P Q R ||| O P ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 1-1 3-0 +[X] ||| O [X,1] Q R [X,2] ||| O [X,1] Q R S [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-0 +[X] ||| O [X,1] Q R ||| O [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-0 +[X] ||| P ||| P ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| S T U V [X,1] ||| T U V [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-0 3-2 +[X] ||| S T U V ||| T U V ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 1-0 3-2 +[X] ||| S [X,1] U V [X,2] ||| [X,1] U V [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 3-2 +[X] ||| S [X,1] U V ||| [X,1] U V ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-2 3-2 +[X] ||| T ||| T ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| W X Y Z [X,1] ||| W X Y Z [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 3-0 +[X] ||| W X Y Z ||| W X Y ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 2-2 3-0 +[X] ||| W X [X,1] Z [X,2] ||| W X [X,1] Z [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-0 +[X] ||| W X [X,1] Z ||| W X [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 3-0 +[X] ||| Y ||| Y ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 0-0 +[X] ||| [X,1] AA BB CC DD ||| [X,1] Z AA BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-5 2-4 3-3 4-2 +[X] ||| [X,1] AA BB CC [X,2] ||| [X,1] Z [X,2] BB CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-5 2-4 3-3 +[X] ||| [X,1] AA BB [X,2] DD ||| [X,1] Z AA [X,2] CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-5 2-4 4-2 +[X] ||| [X,1] AA BB [X,2] ||| [X,1] Z [X,2] CC DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-4 2-3 +[X] ||| [X,1] AA [X,2] CC DD ||| [X,1] Z AA BB [X,2] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-5 3-3 4-2 +[X] ||| [X,1] AA [X,2] DD ||| [X,1] Z AA [X,2] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-4 3-2 +[X] ||| [X,1] AA [X,2] ||| [X,1] Z [X,2] DD ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 +[X] ||| [X,1] B C D [X,2] ||| [X,1] C D [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 3-1 +[X] ||| [X,1] B C D ||| [X,1] C ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 3-1 +[X] ||| [X,1] B C [X,2] K ||| [X,1] [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 4-3 4-4 +[X] ||| [X,1] BB CC DD ||| AA BB CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-1 3-0 +[X] ||| [X,1] BB CC [X,2] ||| [X,2] BB CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-1 +[X] ||| [X,1] BB CC ||| BB CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-0 +[X] ||| [X,1] BB [X,2] DD ||| AA [X,2] CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 3-0 +[X] ||| [X,1] BB [X,2] ||| [X,2] CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 +[X] ||| [X,1] BB ||| CC [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] CC DD ||| AA BB [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-0 +[X] ||| [X,1] CC [X,2] ||| [X,2] BB [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 +[X] ||| [X,1] CC ||| BB [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] DD ||| AA [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] E F G H ||| [X,1] D E F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 2-2 3-5 4-5 +[X] ||| [X,1] E F [X,2] K ||| [X,1] D E F G [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 2-2 4-7 4-8 +[X] ||| [X,1] E F [X,2] ||| [X,1] D E F G [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 2-2 +[X] ||| [X,1] E F ||| [X,1] D E F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 2-2 +[X] ||| [X,1] E [X,2] G H ||| [X,1] D [X,2] F G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 3-5 4-5 +[X] ||| [X,1] E [X,2] ||| [X,1] D [X,2] F ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-3 +[X] ||| [X,1] F G H [X,2] ||| E [X,1] G H [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 2-3 3-3 +[X] ||| [X,1] F G H ||| E [X,1] G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 2-3 3-3 +[X] ||| [X,1] F [X,2] I J ||| E [X,1] G [X,2] I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 3-4 4-4 +[X] ||| [X,1] F [X,2] K ||| E [X,1] G [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 3-5 3-6 +[X] ||| [X,1] F [X,2] ||| E [X,1] G [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] F ||| E [X,1] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-0 +[X] ||| [X,1] G H I J ||| [X,1] G H I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-2 3-3 4-3 +[X] ||| [X,1] G H [X,2] K ||| [X,1] G H [X,2] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-2 4-5 4-6 +[X] ||| [X,1] G H [X,2] ||| [X,1] G H [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-2 +[X] ||| [X,1] G H ||| [X,1] G H ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 2-2 +[X] ||| [X,1] I J K ||| [X,1] I J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-1 3-3 3-4 +[X] ||| [X,1] I J [X,2] ||| [X,1] I J [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-1 +[X] ||| [X,1] I J ||| [X,1] I ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-1 +[X] ||| [X,1] K L M N ||| [X,1] J K L M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 1-3 4-4 4-5 +[X] ||| [X,1] K L M [X,2] ||| [X,1] J K L [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 1-3 +[X] ||| [X,1] K ||| [X,1] J K L ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-2 1-3 +[X] ||| [X,1] L M N [X,2] ||| [X,1] M N [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 3-1 3-2 +[X] ||| [X,1] L M N ||| [X,1] M N ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 3-1 3-2 +[X] ||| [X,1] O P Q R ||| [X,1] O P ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 2-2 4-1 +[X] ||| [X,1] O [X,2] Q R ||| [X,1] O [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 4-1 +[X] ||| [X,1] S T U V ||| [X,1] Q R S T U V ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-6 2-4 4-6 +[X] ||| [X,1] S [X,2] U V ||| [X,1] Q R S [X,2] U V ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-6 4-6 +[X] ||| [X,1] W X Y Z ||| [X,1] W X Y ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 3-3 4-1 +[X] ||| [X,1] W X [X,2] Z ||| [X,1] W X [X,2] ||| IsSingletonF=1.0 IsSingletonFE=1.0 ||| 1-1 4-1 diff --git a/python/tests/extractor/run.sh b/python/tests/extractor/run.sh new file mode 100755 index 00000000..f44da9f8 --- /dev/null +++ b/python/tests/extractor/run.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +# Make sure that the sa and online extractors are producing the same (correct) output + +set -x verbose + +python -m cdec.sa.compile -a corpus.al.gz -b corpus.fr-en.gz -o extract >| extract.ini + +cat test.in | python -m cdec.sa.extract -c extract.ini -g gold -o 2>&1 | egrep '\[X\].+\|\|\|.+\|\|\|.+\|\|\|.+\|\|\|'|sed -re 's/INFO.+://g' | ./refmt.py | LC_ALL=C sort >| rules.sort + +cd gold && cat grammar.0|sed -re 's/Egiv.+(IsSingletonF=)/\1/g'|LC_ALL=C sort >| rules.sort && cd .. + +diff gold/rules.sort gold-rules.sort +diff rules.sort gold-rules.sort diff --git a/python/tests/extractor/test.in b/python/tests/extractor/test.in new file mode 100644 index 00000000..42f1a9dc --- /dev/null +++ b/python/tests/extractor/test.in @@ -0,0 +1 @@ +A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA BB CC DD ||| A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA BB CC DD ||| 0-1 3-2 4-5 5-4 6-7 7-7 8-8 9-8 10-10 10-11 13-12 13-13 14-14 15-15 17-14 18-21 19-19 21-21 22-22 24-24 25-22 26-29 27-28 28-27 29-26 diff --git a/python/tests/extractor/test.vis b/python/tests/extractor/test.vis new file mode 100644 index 00000000..2d30a9f4 --- /dev/null +++ b/python/tests/extractor/test.vis @@ -0,0 +1,34 @@ + 012345678901234567890123456789 +0.*............................0 +1..............................1 +2..............................2 +3..*...........................3 +4.....*........................4 +5....*.........................5 +6.......*......................6 +7.......*......................7 +8........*.....................8 +9........*.....................9 +0..........**..................0 +1..............................1 +2..............................2 +3............**................3 +4..............*...............4 +5...............*..............5 +6..............................6 +7..............*...............7 +8.....................*........8 +9...................*..........9 +0..............................0 +1.....................*........1 +2......................*.......2 +3..............................3 +4........................*.....4 +5......................*.......5 +6.............................*6 +7............................*.7 +8...........................*..8 +9..........................*...9 + 012345678901234567890123456789 + +0-1 3-2 4-5 5-4 6-7 7-7 8-8 9-8 10-10 10-11 13-12 13-13 14-14 15-15 17-14 18-21 19-19 21-21 22-22 24-24 25-22 26-29 27-28 28-27 29-26 -- cgit v1.2.3