fixed conflicts

author: Avneesh Saluja <asaluja@gmail.com> 2013-03-28 18:28:16 -0700
committer: Avneesh Saluja <asaluja@gmail.com> 2013-03-28 18:28:16 -0700
commit: 5b8253e0e1f1393a509fb9975ba8c1347af758ed (patch)
tree: 1790470b1d07a0b4973ebce19192e896566ea60b /dtrain/test
parent: 2389a5a8a43dda87c355579838559515b0428421 (diff)
parent: b203f8c5dc8cff1b9c9c2073832b248fcad0765a (diff)
7 files changed, 0 insertions, 195 deletions
diff --git a/dtrain/test/example/README b/dtrain/test/example/README
deleted file mode 100644
index 6937b11b..00000000
--- a/dtrain/test/example/README
+++ /dev/null
@@ -1,8 +0,0 @@
-Small example of input format for distributed training.
-Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini .
-
-For this to work, undef 'DTRAIN_LOCAL' in dtrain.h
-and recompile.
-
-Data is here: http://simianer.de/#dtrain
-
diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini
deleted file mode 100644
index 6642107f..00000000
--- a/dtrain/test/example/cdec.ini
+++ /dev/null
@@ -1,24 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-scfg_max_span_limit=15
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=30
-feature_function=WordPenalty
-feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz
-# all currently working feature functions for translation:
-# (with those features active that were used in the ACL paper)
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-feature_function=RuleIdentityFeatures
-feature_function=RuleNgramFeatures
-feature_function=RuleShape
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
deleted file mode 100644
index c8ac7c3f..00000000
--- a/dtrain/test/example/dtrain.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-input=test/example/nc-wmt11.1k.gz    # use '-' for STDIN
-output=-                             # a weights file (add .gz for gzip compression) or STDOUT '-'
-select_weights=VOID     # don't output weights
-decoder_config=test/example/cdec.ini # config for cdec
-# weights for these features will be printed on each iteration
-print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
-stop_after=10 # stop epoch after 10 inputs
-
-# interesting stuff
-epochs=3                # run over input 3 times
-k=100                   # use 100best lists
-N=4                     # optimize (approx) BLEU4
-scorer=stupid_bleu      # use 'stupid' BLEU+1
-learning_rate=0.0001    # learning rate
-gamma=0                 # use SVM reg
-sample_from=kbest       # use kbest lists (as opposed to forest)
-filter=uniq             # only unique entries in kbest (surface form)
-pair_sampling=XYX
-hi_lo=0.1               # 10 vs 80 vs 10 and 80 vs 10 here
-pair_threshold=0        # minimum distance in BLEU (this will still only use pairs with diff > 0)
-loss_margin=0
diff --git a/dtrain/test/example/expected-output b/dtrain/test/example/expected-output
deleted file mode 100644
index 25d2c069..00000000
--- a/dtrain/test/example/expected-output
+++ /dev/null
@@ -1,125 +0,0 @@
-                cdec cfg 'test/example/cdec.ini'
-feature: WordPenalty (no config parameters)
-State is 0 bytes for feature WordPenalty
-feature: KLanguageModel (with config parameters 'test/example/nc-wmt11.en.srilm.gz')
-Loading the LM will be faster if you build a binary file.
-Reading test/example/nc-wmt11.en.srilm.gz
-----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
-****************************************************************************************************
-Loaded 5-gram KLM from test/example/nc-wmt11.en.srilm.gz (MapSize=49581)
-State is 98 bytes for feature KLanguageModel test/example/nc-wmt11.en.srilm.gz
-feature: RuleIdentityFeatures (no config parameters)
-State is 0 bytes for feature RuleIdentityFeatures
-feature: RuleNgramFeatures (no config parameters)
-State is 0 bytes for feature RuleNgramFeatures
-feature: RuleShape (no config parameters)
-  Example feature: Shape_S00000_T00000
-State is 0 bytes for feature RuleShape
-Seeding random number sequence to 1072059181
-
-dtrain
-Parameters:
-                       k 100
-                       N 4
-                       T 3
-                 scorer 'stupid_bleu'
-             sample from 'kbest'
-                  filter 'uniq'
-           learning rate 0.0001
-                   gamma 0
-             loss margin 0
-                   pairs 'XYX'
-                   hi lo 0.1
-          pair threshold 0
-          select weights 'VOID'
-                  l1 reg 0 'none'
-                cdec cfg 'test/example/cdec.ini'
-                   input 'test/example/nc-wmt11.1k.gz'
-                  output '-'
-              stop_after 10
-(a dot represents 10 inputs)
-Iteration #1 of 3.
- . 10
-Stopping after 10 input sentences.
-WEIGHTS
-              Glue = -0.0293
-       WordPenalty = +0.049075
-     LanguageModel = +0.24345
- LanguageModel_OOV = -0.2029
-     PhraseModel_0 = +0.0084102
-     PhraseModel_1 = +0.021729
-     PhraseModel_2 = +0.014922
-     PhraseModel_3 = +0.104
-     PhraseModel_4 = -0.14308
-     PhraseModel_5 = +0.0247
-     PhraseModel_6 = -0.012
-       PassThrough = -0.2161
-        ---
-       1best avg score: 0.16872 (+0.16872)
- 1best avg model score: -1.8276 (-1.8276)
-           avg # pairs: 1121.1
-        avg # rank err: 555.6
-     avg # margin viol: 0
-    non0 feature count: 277
-           avg list sz: 77.2
-           avg f count: 90.96
-(time 0.1 min, 0.6 s/S)
-
-Iteration #2 of 3.
- . 10
-WEIGHTS
-              Glue = -0.3526
-       WordPenalty = +0.067576
-     LanguageModel = +1.155
- LanguageModel_OOV = -0.2728
-     PhraseModel_0 = -0.025529
-     PhraseModel_1 = +0.095869
-     PhraseModel_2 = +0.094567
-     PhraseModel_3 = +0.12482
-     PhraseModel_4 = -0.36533
-     PhraseModel_5 = +0.1068
-     PhraseModel_6 = -0.1517
-       PassThrough = -0.286
-        ---
-       1best avg score: 0.18394 (+0.015221)
- 1best avg model score: 3.205 (+5.0326)
-           avg # pairs: 1168.3
-        avg # rank err: 594.8
-     avg # margin viol: 0
-    non0 feature count: 543
-           avg list sz: 77.5
-           avg f count: 85.916
-(time 0.083 min, 0.5 s/S)
-
-Iteration #3 of 3.
- . 10
-WEIGHTS
-              Glue = -0.392
-       WordPenalty = +0.071963
-     LanguageModel = +0.81266
- LanguageModel_OOV = -0.4177
-     PhraseModel_0 = -0.2649
-     PhraseModel_1 = -0.17931
-     PhraseModel_2 = +0.038261
-     PhraseModel_3 = +0.20261
-     PhraseModel_4 = -0.42621
-     PhraseModel_5 = +0.3198
-     PhraseModel_6 = -0.1437
-       PassThrough = -0.4309
-        ---
-       1best avg score: 0.2962 (+0.11225)
- 1best avg model score: -36.274 (-39.479)
-           avg # pairs: 1109.6
-        avg # rank err: 515.9
-     avg # margin viol: 0
-    non0 feature count: 741
-           avg list sz: 77
-           avg f count: 88.982
-(time 0.083 min, 0.5 s/S)
-
-Writing weights file to '-' ...
-done
-
----
-Best iteration: 3 [SCORE 'stupid_bleu'=0.2962].
-This took 0.26667 min.
diff --git a/dtrain/test/toy/cdec.ini b/dtrain/test/toy/cdec.ini
deleted file mode 100644
index 98b02d44..00000000
--- a/dtrain/test/toy/cdec.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini
deleted file mode 100644
index a091732f..00000000
--- a/dtrain/test/toy/dtrain.ini
+++ /dev/null
@@ -1,12 +0,0 @@
-decoder_config=test/toy/cdec.ini
-input=test/toy/input
-output=-
-print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
-k=4
-N=4
-epochs=2
-scorer=bleu
-sample_from=kbest
-filter=uniq
-pair_sampling=all
-learning_rate=1
diff --git a/dtrain/test/toy/input b/dtrain/test/toy/input
deleted file mode 100644
index 4d10a9ea..00000000
--- a/dtrain/test/toy/input
+++ /dev/null
@@ -1,2 +0,0 @@
-0	ich sah ein kleines haus	i saw a little house	[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0	[NP] ||| ich ||| i ||| logp=0	[NP] ||| ein [NN,1] ||| a [1] ||| logp=0	[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1	[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1	[JJ] ||| kleines ||| small ||| logp=0 small_rule=1	[JJ] ||| kleines ||| little ||| logp=0 little_rule=1	[JJ] ||| grosses ||| big ||| logp=0	[JJ] ||| grosses ||| large ||| logp=0	[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0	[V] ||| sah ||| saw ||| logp=0	[V] ||| fand ||| found ||| logp=0
-1	ich fand ein kleines haus	i found a little house	[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0	[NP] ||| ich ||| i ||| logp=0	[NP] ||| ein [NN,1] ||| a [1] ||| logp=0	[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1	[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1	[JJ] ||| kleines ||| small ||| logp=0 small_rule=1	[JJ] ||| kleines ||| little ||| logp=0 little_rule=1	[JJ] ||| grosses ||| big ||| logp=0	[JJ] ||| grosses ||| large ||| logp=0	[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0	[V] ||| sah ||| saw ||| logp=0	[V] ||| fand ||| found ||| logp=0
author	Avneesh Saluja <asaluja@gmail.com>	2013-03-28 18:28:16 -0700
committer	Avneesh Saluja <asaluja@gmail.com>	2013-03-28 18:28:16 -0700
commit	5b8253e0e1f1393a509fb9975ba8c1347af758ed (patch)
tree	1790470b1d07a0b4973ebce19192e896566ea60b /dtrain/test
parent	2389a5a8a43dda87c355579838559515b0428421 (diff)
parent	b203f8c5dc8cff1b9c9c2073832b248fcad0765a (diff)