summaryrefslogtreecommitdiff
path: root/training/dtrain/examples/toy
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-03-15 11:31:18 +0100
committerPatrick Simianer <p@simianer.de>2013-03-15 11:31:18 +0100
commitcf67d34738e1487f75739dc1e027b1864a06513b (patch)
tree2080f05433457f84280665e8bec27a2ec9dc259a /training/dtrain/examples/toy
parent2b4b3adc764085bccc6ddbde96b8cc7ba4287a9f (diff)
overhauled ruby scripts and examples
Diffstat (limited to 'training/dtrain/examples/toy')
-rw-r--r--training/dtrain/examples/toy/cdec.ini3
-rw-r--r--training/dtrain/examples/toy/dtrain.ini13
-rw-r--r--training/dtrain/examples/toy/expected-output77
-rw-r--r--training/dtrain/examples/toy/grammar.gzbin0 -> 219 bytes
-rw-r--r--training/dtrain/examples/toy/src2
-rw-r--r--training/dtrain/examples/toy/tgt2
6 files changed, 97 insertions, 0 deletions
diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini
new file mode 100644
index 00000000..b14f4819
--- /dev/null
+++ b/training/dtrain/examples/toy/cdec.ini
@@ -0,0 +1,3 @@
+formalism=scfg
+add_pass_through_rules=true
+grammar=grammar.gz
diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini
new file mode 100644
index 00000000..cd715f26
--- /dev/null
+++ b/training/dtrain/examples/toy/dtrain.ini
@@ -0,0 +1,13 @@
+decoder_config=cdec.ini
+input=src
+refs=tgt
+output=-
+print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
+k=4
+N=4
+epochs=2
+scorer=bleu
+sample_from=kbest
+filter=uniq
+pair_sampling=all
+learning_rate=1
diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output
new file mode 100644
index 00000000..1da2aadd
--- /dev/null
+++ b/training/dtrain/examples/toy/expected-output
@@ -0,0 +1,77 @@
+Warning: hi_lo only works with pair_sampling XYX.
+ cdec cfg 'cdec.ini'
+Seeding random number sequence to 1664825829
+
+dtrain
+Parameters:
+ k 4
+ N 4
+ T 2
+ scorer 'bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 1
+ gamma 0
+ loss margin 0
+ pairs 'all'
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'src'
+ refs 'tgt'
+ output '-'
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+ 2
+WEIGHTS
+ logp = +0
+ shell_rule = -1
+ house_rule = +2
+ small_rule = -2
+ little_rule = +3
+ PassThrough = -5
+ ---
+ 1best avg score: 0.5 (+0.5)
+ 1best avg model score: 2.5 (+2.5)
+ avg # pairs: 4
+ avg # rank err: 1.5
+ avg # margin viol: 0
+ non0 feature count: 6
+ avg list sz: 4
+ avg f count: 2.875
+(time 0 min, 0 s/S)
+
+Iteration #2 of 2.
+ 2
+WEIGHTS
+ logp = +0
+ shell_rule = -1
+ house_rule = +2
+ small_rule = -2
+ little_rule = +3
+ PassThrough = -5
+ ---
+ 1best avg score: 1 (+0.5)
+ 1best avg model score: 5 (+2.5)
+ avg # pairs: 5
+ avg # rank err: 0
+ avg # margin viol: 0
+ non0 feature count: 6
+ avg list sz: 4
+ avg f count: 3
+(time 0 min, 0 s/S)
+
+Writing weights file to '-' ...
+house_rule 2
+little_rule 3
+Glue -4
+PassThrough -5
+small_rule -2
+shell_rule -1
+done
+
+---
+Best iteration: 2 [SCORE 'bleu'=1].
+This took 0 min.
diff --git a/training/dtrain/examples/toy/grammar.gz b/training/dtrain/examples/toy/grammar.gz
new file mode 100644
index 00000000..8eb0d29e
--- /dev/null
+++ b/training/dtrain/examples/toy/grammar.gz
Binary files differ
diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src
new file mode 100644
index 00000000..87e39ef2
--- /dev/null
+++ b/training/dtrain/examples/toy/src
@@ -0,0 +1,2 @@
+ich sah ein kleines haus
+ich fand ein kleines haus
diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt
new file mode 100644
index 00000000..174926b3
--- /dev/null
+++ b/training/dtrain/examples/toy/tgt
@@ -0,0 +1,2 @@
+i saw a little house
+i found a little house