summaryrefslogtreecommitdiff
path: root/training/dtrain/test
diff options
context:
space:
mode:
Diffstat (limited to 'training/dtrain/test')
-rw-r--r--training/dtrain/test/example/README8
-rw-r--r--training/dtrain/test/example/cdec.ini25
-rw-r--r--training/dtrain/test/example/dtrain.ini22
-rw-r--r--training/dtrain/test/example/expected-output89
-rw-r--r--training/dtrain/test/parallelize/README5
-rw-r--r--training/dtrain/test/parallelize/cdec.ini22
-rw-r--r--training/dtrain/test/parallelize/dtrain.ini15
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.0.gzbin8318 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.1.gzbin358560 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.2.gzbin1014466 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.3.gzbin391811 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.4.gzbin149590 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.5.gzbin537024 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.6.gzbin291286 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.7.gzbin1038140 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.8.gzbin419889 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/g/grammar.out.9.gzbin409140 -> 0 bytes
-rw-r--r--training/dtrain/test/parallelize/in10
-rw-r--r--training/dtrain/test/parallelize/refs10
-rw-r--r--training/dtrain/test/toy/cdec.ini2
-rw-r--r--training/dtrain/test/toy/dtrain.ini12
-rw-r--r--training/dtrain/test/toy/input2
22 files changed, 0 insertions, 222 deletions
diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README
deleted file mode 100644
index 2df77086..00000000
--- a/training/dtrain/test/example/README
+++ /dev/null
@@ -1,8 +0,0 @@
-Small example of input format for distributed training.
-Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini .
-
-For this to work, undef 'DTRAIN_LOCAL' in dtrain.h
-and recompile.
-
-data can be found here: http://simianer.de/#dtrain
-
diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini
deleted file mode 100644
index 0215416d..00000000
--- a/training/dtrain/test/example/cdec.ini
+++ /dev/null
@@ -1,25 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-scfg_max_span_limit=15
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-feature_function=WordPenalty
-feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
-# all currently working feature functions for translation:
-# (with those features active that were used in the ACL paper)
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-feature_function=RuleIdentityFeatures
-feature_function=RuleSourceBigramFeatures
-feature_function=RuleTargetBigramFeatures
-feature_function=RuleShape
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini
deleted file mode 100644
index 97fce7f0..00000000
--- a/training/dtrain/test/example/dtrain.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-input=./nc-wmt11.1k.gz # use '-' for STDIN
-output=- # a weights file (add .gz for gzip compression) or STDOUT '-'
-select_weights=VOID # don't output weights
-decoder_config=./cdec.ini # config for cdec
-# weights for these features will be printed on each iteration
-print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
-stop_after=10 # stop epoch after 10 inputs
-
-# interesting stuff
-epochs=2 # run over input 2 times
-k=100 # use 100best lists
-N=4 # optimize (approx) BLEU4
-scorer=stupid_bleu # use 'stupid' BLEU+1
-learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron)
-gamma=0 # use SVM reg
-sample_from=kbest # use kbest lists (as opposed to forest)
-filter=uniq # only unique entries in kbest (surface form)
-pair_sampling=XYX
-hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here
-pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0)
-loss_margin=0
diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output
deleted file mode 100644
index 05326763..00000000
--- a/training/dtrain/test/example/expected-output
+++ /dev/null
@@ -1,89 +0,0 @@
- cdec cfg 'test/example/cdec.ini'
-Loading the LM will be faster if you build a binary file.
-Reading test/example/nc-wmt11.en.srilm.gz
-----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
-****************************************************************************************************
- Example feature: Shape_S00000_T00000
-Seeding random number sequence to 2912000813
-
-dtrain
-Parameters:
- k 100
- N 4
- T 2
- scorer 'stupid_bleu'
- sample from 'kbest'
- filter 'uniq'
- learning rate 1
- gamma 0
- loss margin 0
- pairs 'XYX'
- hi lo 0.1
- pair threshold 0
- select weights 'VOID'
- l1 reg 0 'none'
- max pairs 4294967295
- cdec cfg 'test/example/cdec.ini'
- input 'test/example/nc-wmt11.1k.gz'
- output '-'
- stop_after 10
-(a dot represents 10 inputs)
-Iteration #1 of 2.
- . 10
-Stopping after 10 input sentences.
-WEIGHTS
- Glue = -637
- WordPenalty = +1064
- LanguageModel = +1175.3
- LanguageModel_OOV = -1437
- PhraseModel_0 = +1935.6
- PhraseModel_1 = +2499.3
- PhraseModel_2 = +964.96
- PhraseModel_3 = +1410.8
- PhraseModel_4 = -5977.9
- PhraseModel_5 = +522
- PhraseModel_6 = +1089
- PassThrough = -1308
- ---
- 1best avg score: 0.16963 (+0.16963)
- 1best avg model score: 64485 (+64485)
- avg # pairs: 1494.4
- avg # rank err: 702.6
- avg # margin viol: 0
- non0 feature count: 528
- avg list sz: 85.7
- avg f count: 102.75
-(time 0.083 min, 0.5 s/S)
-
-Iteration #2 of 2.
- . 10
-WEIGHTS
- Glue = -1196
- WordPenalty = +809.52
- LanguageModel = +3112.1
- LanguageModel_OOV = -1464
- PhraseModel_0 = +3895.5
- PhraseModel_1 = +4683.4
- PhraseModel_2 = +1092.8
- PhraseModel_3 = +1079.6
- PhraseModel_4 = -6827.7
- PhraseModel_5 = -888
- PhraseModel_6 = +142
- PassThrough = -1335
- ---
- 1best avg score: 0.277 (+0.10736)
- 1best avg model score: -3110.5 (-67595)
- avg # pairs: 1144.2
- avg # rank err: 529.1
- avg # margin viol: 0
- non0 feature count: 859
- avg list sz: 74.9
- avg f count: 112.84
-(time 0.067 min, 0.4 s/S)
-
-Writing weights file to '-' ...
-done
-
----
-Best iteration: 2 [SCORE 'stupid_bleu'=0.277].
-This took 0.15 min.
diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/test/parallelize/README
deleted file mode 100644
index 89715105..00000000
--- a/training/dtrain/test/parallelize/README
+++ /dev/null
@@ -1,5 +0,0 @@
-run for example
- ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs
-
-final weights will be in the file work/weights.3
-
diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini
deleted file mode 100644
index e43ba1c4..00000000
--- a/training/dtrain/test/parallelize/cdec.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-scfg_max_span_limit=15
-feature_function=WordPenalty
-feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-#feature_function=RuleIdentityFeatures
-#feature_function=RuleNgramFeatures
-#feature_function=RuleShape
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
diff --git a/training/dtrain/test/parallelize/dtrain.ini b/training/dtrain/test/parallelize/dtrain.ini
deleted file mode 100644
index 03f9d240..00000000
--- a/training/dtrain/test/parallelize/dtrain.ini
+++ /dev/null
@@ -1,15 +0,0 @@
-k=100
-N=4
-learning_rate=0.0001
-gamma=0
-loss_margin=0
-epochs=1
-scorer=stupid_bleu
-sample_from=kbest
-filter=uniq
-pair_sampling=XYX
-hi_lo=0.1
-select_weights=last
-print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
-decoder_config=cdec.ini
diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/test/parallelize/g/grammar.out.0.gz
deleted file mode 100644
index 1e28a24b..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.0.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/test/parallelize/g/grammar.out.1.gz
deleted file mode 100644
index 372f5675..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.1.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/test/parallelize/g/grammar.out.2.gz
deleted file mode 100644
index 145d0dc0..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.2.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/test/parallelize/g/grammar.out.3.gz
deleted file mode 100644
index 105593ff..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.3.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/test/parallelize/g/grammar.out.4.gz
deleted file mode 100644
index 30781f48..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.4.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/test/parallelize/g/grammar.out.5.gz
deleted file mode 100644
index 834ee759..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.5.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/test/parallelize/g/grammar.out.6.gz
deleted file mode 100644
index 2e76f348..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.6.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/test/parallelize/g/grammar.out.7.gz
deleted file mode 100644
index 3741a887..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.7.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/test/parallelize/g/grammar.out.8.gz
deleted file mode 100644
index ebf6bd0c..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.8.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/test/parallelize/g/grammar.out.9.gz
deleted file mode 100644
index c1791059..00000000
--- a/training/dtrain/test/parallelize/g/grammar.out.9.gz
+++ /dev/null
Binary files differ
diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in
deleted file mode 100644
index 3b7dec39..00000000
--- a/training/dtrain/test/parallelize/in
+++ /dev/null
@@ -1,10 +0,0 @@
-<seg grammar="g/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
-<seg grammar="g/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
-<seg grammar="g/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
-<seg grammar="g/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
-<seg grammar="g/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
-<seg grammar="g/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
-<seg grammar="g/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
-<seg grammar="g/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
-<seg grammar="g/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
-<seg grammar="g/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/test/parallelize/refs
deleted file mode 100644
index 632e27b0..00000000
--- a/training/dtrain/test/parallelize/refs
+++ /dev/null
@@ -1,10 +0,0 @@
-europe 's divided racial house
-a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
-the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
-while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
-an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
-mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
-it will not , as america 's racial history clearly shows .
-race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
-the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
-this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/test/toy/cdec.ini b/training/dtrain/test/toy/cdec.ini
deleted file mode 100644
index 98b02d44..00000000
--- a/training/dtrain/test/toy/cdec.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
diff --git a/training/dtrain/test/toy/dtrain.ini b/training/dtrain/test/toy/dtrain.ini
deleted file mode 100644
index a091732f..00000000
--- a/training/dtrain/test/toy/dtrain.ini
+++ /dev/null
@@ -1,12 +0,0 @@
-decoder_config=test/toy/cdec.ini
-input=test/toy/input
-output=-
-print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
-k=4
-N=4
-epochs=2
-scorer=bleu
-sample_from=kbest
-filter=uniq
-pair_sampling=all
-learning_rate=1
diff --git a/training/dtrain/test/toy/input b/training/dtrain/test/toy/input
deleted file mode 100644
index 4d10a9ea..00000000
--- a/training/dtrain/test/toy/input
+++ /dev/null
@@ -1,2 +0,0 @@
-0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0
-1 ich fand ein kleines haus i found a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0